Skip to content

Commit 0bab9e9

Browse files
pravinbhatmsmygit
andauthored
Connect to Astra via TLS w/o using SCB (#309)
* Implemented connection to Astra via TLS options without SCB * Replaced hardcoded sides(Origin/Target) with enums * Cleaned up imports * Apply suggestions from code review --------- Co-authored-by: Madhavan <[email protected]>
1 parent d81cdfb commit 0bab9e9

20 files changed

+219
-39
lines changed

RELEASE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# Release Notes
2+
## [4.4.0] - 2024-09-19
3+
- Added property `spark.cdm.connect.origin.tls.isAstra` and `spark.cdm.connect.target.tls.isAstra` to allow connecting to Astra DB without using [SCB](https://docs.datastax.com/en/astra-db-serverless/drivers/secure-connect-bundle.html). This may be needed for enterprises that may find credentials packaged within SCB as a security risk. TLS properties can now be passed as params OR wrapper scripts (not included) could be used to pull sensitive credentials from a vault service in real-time & pass them to CDM.
4+
25
## [4.3.10] - 2024-09-12
36
- Added property `spark.cdm.trackRun.runId` to support a custom unique identifier for the current run. This can be used by wrapper scripts to pass a known `runId` and then use it to query the `cdm_run_info` and `cdm_run_details` tables.
47

src/main/java/com/datastax/cdm/cql/statement/BaseCdmStatement.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import com.datastax.cdm.cql.EnhancedSession;
2222
import com.datastax.cdm.properties.IPropertyHelper;
2323
import com.datastax.cdm.schema.CqlTable;
24-
import com.datastax.oss.driver.api.core.cql.*;
24+
import com.datastax.oss.driver.api.core.cql.PreparedStatement;
2525

2626
public class BaseCdmStatement {
2727

src/main/java/com/datastax/cdm/data/CqlConversion.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import java.util.*;
2020
import java.util.stream.Collectors;
2121

22-
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
2322
import org.slf4j.Logger;
2423
import org.slf4j.LoggerFactory;
2524

src/main/java/com/datastax/cdm/data/DataUtility.java

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,13 @@
1515
*/
1616
package com.datastax.cdm.data;
1717

18+
import java.io.File;
19+
import java.io.FileInputStream;
20+
import java.io.FileOutputStream;
21+
import java.io.IOException;
1822
import java.util.*;
23+
import java.util.zip.ZipEntry;
24+
import java.util.zip.ZipOutputStream;
1925

2026
import org.slf4j.Logger;
2127
import org.slf4j.LoggerFactory;
@@ -26,6 +32,7 @@
2632

2733
public class DataUtility {
2834
public static final Logger logger = LoggerFactory.getLogger(CqlConversion.class);
35+
protected static final String SCB_FILE_NAME = "_temp_cdm_scb_do_not_touch.zip";
2936

3037
public static boolean diff(Object obj1, Object obj2) {
3138
if (obj1 == null && obj2 == null) {
@@ -143,4 +150,68 @@ public static String getMyClassMethodLine(Exception e) {
143150

144151
return "Unknown";
145152
}
153+
154+
public static void deleteGeneratedSCB() {
155+
File file = new File(PKFactory.Side.ORIGIN + SCB_FILE_NAME);
156+
if (file.exists()) {
157+
file.delete();
158+
}
159+
file = new File(PKFactory.Side.TARGET + SCB_FILE_NAME);
160+
if (file.exists()) {
161+
file.delete();
162+
}
163+
}
164+
165+
public static File generateSCB(String host, String port, String trustStorePassword, String trustStorePath,
166+
String keyStorePassword, String keyStorePath, PKFactory.Side side) throws IOException {
167+
FileOutputStream fileOutputStream = new FileOutputStream("config.json");
168+
String scbJson = new StringBuilder("{\"host\": \"").append(host).append("\", \"port\": ").append(port)
169+
.append(", \"keyStoreLocation\": \"./identity.jks\", \"keyStorePassword\": \"").append(keyStorePassword)
170+
.append("\", \"trustStoreLocation\": \"./trustStore.jks\", \"trustStorePassword\": \"")
171+
.append(trustStorePassword).append("\"}").toString();
172+
fileOutputStream.write(scbJson.getBytes());
173+
fileOutputStream.close();
174+
File configFile = new File("config.json");
175+
FilePathAndNewName configFileWithName = new FilePathAndNewName(configFile, "config.json");
176+
FilePathAndNewName keyFileWithName = new FilePathAndNewName(new File(keyStorePath), "identity.jks");
177+
FilePathAndNewName trustFileWithName = new FilePathAndNewName(new File(trustStorePath), "trustStore.jks");
178+
File zipFile = zip(Arrays.asList(configFileWithName, keyFileWithName, trustFileWithName), side + SCB_FILE_NAME);
179+
configFile.delete();
180+
181+
return zipFile;
182+
}
183+
184+
private static File zip(List<FilePathAndNewName> files, String filename) {
185+
File zipfile = new File(filename);
186+
byte[] buf = new byte[1024];
187+
try {
188+
ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipfile));
189+
for (int i = 0; i < files.size(); i++) {
190+
out.putNextEntry(new ZipEntry(files.get(i).name));
191+
FileInputStream in = new FileInputStream(files.get(i).file.getCanonicalPath());
192+
int len;
193+
while ((len = in.read(buf)) > 0) {
194+
out.write(buf, 0, len);
195+
}
196+
out.closeEntry();
197+
in.close();
198+
}
199+
out.close();
200+
201+
return zipfile;
202+
} catch (IOException ex) {
203+
logger.error("Unable to write out zip file: {}. Got exception: {}", filename, ex.getMessage());
204+
}
205+
return null;
206+
}
207+
208+
static class FilePathAndNewName {
209+
File file;
210+
String name;
211+
212+
public FilePathAndNewName(File file, String name) {
213+
this.file = file;
214+
this.name = name;
215+
}
216+
}
146217
}

src/main/java/com/datastax/cdm/job/AbstractJobSession.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.slf4j.LoggerFactory;
2323

2424
import com.datastax.cdm.cql.EnhancedSession;
25+
import com.datastax.cdm.data.DataUtility;
2526
import com.datastax.cdm.data.PKFactory;
2627
import com.datastax.cdm.feature.Feature;
2728
import com.datastax.cdm.feature.Featureset;
@@ -114,6 +115,7 @@ public synchronized void initCdmRun(long runId, long prevRunId, Collection<Split
114115
public synchronized void printCounts(boolean isFinal) {
115116
if (isFinal) {
116117
jobCounter.printFinal(runId, trackRunFeature);
118+
DataUtility.deleteGeneratedSCB();
117119
} else {
118120
jobCounter.printProgress();
119121
}

src/main/java/com/datastax/cdm/job/GuardrailCheckJobSession.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
import com.datastax.cdm.data.PKFactory;
2727
import com.datastax.cdm.data.Record;
2828
import com.datastax.oss.driver.api.core.CqlSession;
29-
import com.datastax.oss.driver.api.core.cql.*;
29+
import com.datastax.oss.driver.api.core.cql.ResultSet;
30+
import com.datastax.oss.driver.api.core.cql.Row;
3031

3132
public class GuardrailCheckJobSession extends AbstractJobSession<SplitPartitions.Partition> {
3233

src/main/java/com/datastax/cdm/properties/KnownProperties.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ public enum PropertyType {
279279
public static final String ORIGIN_TLS_KEYSTORE_PATH = "spark.cdm.connect.origin.tls.keyStore.path";
280280
public static final String ORIGIN_TLS_KEYSTORE_PASSWORD = "spark.cdm.connect.origin.tls.keyStore.password";
281281
public static final String ORIGIN_TLS_ALGORITHMS = "spark.cdm.connect.origin.tls.enabledAlgorithms"; // TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
282+
public static final String ORIGIN_TLS_IS_ASTRA = "spark.cdm.connect.origin.tls.isAstra";
282283
static {
283284
types.put(ORIGIN_TLS_ENABLED, PropertyType.BOOLEAN);
284285
defaults.put(ORIGIN_TLS_ENABLED, "false");
@@ -290,6 +291,8 @@ public enum PropertyType {
290291
types.put(ORIGIN_TLS_KEYSTORE_PASSWORD, PropertyType.STRING);
291292
types.put(ORIGIN_TLS_ALGORITHMS, PropertyType.STRING); // This is a list but it is handled by Spark
292293
defaults.put(ORIGIN_TLS_ALGORITHMS, "TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA");
294+
types.put(ORIGIN_TLS_IS_ASTRA, PropertyType.BOOLEAN);
295+
defaults.put(ORIGIN_TLS_IS_ASTRA, "false");
293296
}
294297

295298
// ==========================================================================
@@ -302,6 +305,7 @@ public enum PropertyType {
302305
public static final String TARGET_TLS_KEYSTORE_PATH = "spark.cdm.connect.target.tls.keyStore.path";
303306
public static final String TARGET_TLS_KEYSTORE_PASSWORD = "spark.cdm.connect.target.tls.keyStore.password";
304307
public static final String TARGET_TLS_ALGORITHMS = "spark.cdm.connect.target.tls.enabledAlgorithms"; // TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
308+
public static final String TARGET_TLS_IS_ASTRA = "spark.cdm.connect.target.tls.isAstra";
305309
static {
306310
types.put(TARGET_TLS_ENABLED, PropertyType.BOOLEAN);
307311
defaults.put(TARGET_TLS_ENABLED, "false");
@@ -313,6 +317,8 @@ public enum PropertyType {
313317
types.put(TARGET_TLS_KEYSTORE_PASSWORD, PropertyType.STRING);
314318
types.put(TARGET_TLS_ALGORITHMS, PropertyType.STRING); // This is a list but it is handled by Spark
315319
defaults.put(TARGET_TLS_ALGORITHMS, "TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA");
320+
types.put(TARGET_TLS_IS_ASTRA, PropertyType.BOOLEAN);
321+
defaults.put(TARGET_TLS_IS_ASTRA, "false");
316322
}
317323

318324
// ==========================================================================

src/main/scala/com/datastax/cdm/job/BaseJob.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import org.apache.spark.{SparkConf, SparkContext}
2121
import org.apache.spark.rdd.RDD
2222
import org.apache.spark.sql.SparkSession
2323
import org.slf4j.LoggerFactory
24+
import com.datastax.cdm.data.PKFactory.Side
2425

2526
import java.math.BigInteger
2627
import java.util
@@ -70,8 +71,8 @@ abstract class BaseJob[T: ClassTag] extends App {
7071

7172
consistencyLevel = propertyHelper.getString(KnownProperties.READ_CL)
7273
val connectionFetcher = new ConnectionFetcher(sContext, propertyHelper)
73-
originConnection = connectionFetcher.getConnection("ORIGIN", consistencyLevel)
74-
targetConnection = connectionFetcher.getConnection("TARGET", consistencyLevel)
74+
originConnection = connectionFetcher.getConnection(Side.ORIGIN, consistencyLevel)
75+
targetConnection = connectionFetcher.getConnection(Side.TARGET, consistencyLevel)
7576

7677
val hasRandomPartitioner: Boolean = {
7778
val partitionerName = originConnection.withSessionDo(_.getMetadata.getTokenMap.get().getPartitionerName)

src/main/scala/com/datastax/cdm/job/ConnectionDetails.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,6 @@ case class ConnectionDetails(
2727
trustStoreType: String,
2828
keyStorePath: String,
2929
keyStorePassword: String,
30-
enabledAlgorithms: String
30+
enabledAlgorithms: String,
31+
isAstra: Boolean
3132
)

src/main/scala/com/datastax/cdm/job/ConnectionFetcher.scala

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,19 @@
1515
*/
1616
package com.datastax.cdm.job
1717

18-
import com.datastax.cdm.properties.{KnownProperties, PropertyHelper}
18+
import com.datastax.cdm.properties.{KnownProperties, IPropertyHelper}
1919
import com.datastax.spark.connector.cql.CassandraConnector
2020
import org.apache.spark.{SparkConf, SparkContext}
2121
import org.slf4j.{Logger, LoggerFactory}
22+
import com.datastax.cdm.data.DataUtility.generateSCB
23+
import com.datastax.cdm.data.PKFactory.Side
2224

2325
// TODO: CDM-31 - add localDC configuration support
24-
class ConnectionFetcher(sparkContext: SparkContext, propertyHelper: PropertyHelper) {
26+
class ConnectionFetcher(sparkContext: SparkContext, propertyHelper: IPropertyHelper) {
2527
val logger: Logger = LoggerFactory.getLogger(this.getClass.getName)
2628

27-
def getConnectionDetails(side: String): ConnectionDetails = {
28-
if ("ORIGIN".equals(side.toUpperCase)) {
29+
def getConnectionDetails(side: Side): ConnectionDetails = {
30+
if (Side.ORIGIN.equals(side)) {
2931
ConnectionDetails(
3032
propertyHelper.getAsString(KnownProperties.CONNECT_ORIGIN_SCB),
3133
propertyHelper.getAsString(KnownProperties.CONNECT_ORIGIN_HOST),
@@ -35,10 +37,11 @@ class ConnectionFetcher(sparkContext: SparkContext, propertyHelper: PropertyHelp
3537
propertyHelper.getAsString(KnownProperties.ORIGIN_TLS_ENABLED),
3638
propertyHelper.getAsString(KnownProperties.ORIGIN_TLS_TRUSTSTORE_PATH),
3739
propertyHelper.getAsString(KnownProperties.ORIGIN_TLS_TRUSTSTORE_PASSWORD),
38-
propertyHelper.getString(KnownProperties.ORIGIN_TLS_TRUSTSTORE_TYPE),
40+
propertyHelper.getAsString(KnownProperties.ORIGIN_TLS_TRUSTSTORE_TYPE),
3941
propertyHelper.getAsString(KnownProperties.ORIGIN_TLS_KEYSTORE_PATH),
4042
propertyHelper.getAsString(KnownProperties.ORIGIN_TLS_KEYSTORE_PASSWORD),
41-
propertyHelper.getAsString(KnownProperties.ORIGIN_TLS_ALGORITHMS)
43+
propertyHelper.getAsString(KnownProperties.ORIGIN_TLS_ALGORITHMS),
44+
propertyHelper.getBoolean(KnownProperties.ORIGIN_TLS_IS_ASTRA)
4245
)
4346
}
4447
else {
@@ -51,15 +54,16 @@ class ConnectionFetcher(sparkContext: SparkContext, propertyHelper: PropertyHelp
5154
propertyHelper.getAsString(KnownProperties.TARGET_TLS_ENABLED),
5255
propertyHelper.getAsString(KnownProperties.TARGET_TLS_TRUSTSTORE_PATH),
5356
propertyHelper.getAsString(KnownProperties.TARGET_TLS_TRUSTSTORE_PASSWORD),
54-
propertyHelper.getString(KnownProperties.TARGET_TLS_TRUSTSTORE_TYPE),
57+
propertyHelper.getAsString(KnownProperties.TARGET_TLS_TRUSTSTORE_TYPE),
5558
propertyHelper.getAsString(KnownProperties.TARGET_TLS_KEYSTORE_PATH),
5659
propertyHelper.getAsString(KnownProperties.TARGET_TLS_KEYSTORE_PASSWORD),
57-
propertyHelper.getAsString(KnownProperties.TARGET_TLS_ALGORITHMS)
60+
propertyHelper.getAsString(KnownProperties.TARGET_TLS_ALGORITHMS),
61+
propertyHelper.getBoolean(KnownProperties.TARGET_TLS_IS_ASTRA)
5862
)
5963
}
6064
}
6165

62-
def getConnection(side: String, consistencyLevel: String): CassandraConnector = {
66+
def getConnection(side: Side, consistencyLevel: String): CassandraConnector = {
6367
val connectionDetails = getConnectionDetails(side)
6468
val config: SparkConf = sparkContext.getConf
6569

@@ -72,6 +76,17 @@ class ConnectionFetcher(sparkContext: SparkContext, propertyHelper: PropertyHelp
7276
.set("spark.cassandra.auth.password", connectionDetails.password)
7377
.set("spark.cassandra.input.consistency.level", consistencyLevel)
7478
.set("spark.cassandra.connection.config.cloud.path", connectionDetails.scbPath))
79+
} else if (connectionDetails.trustStorePath.nonEmpty && connectionDetails.isAstra) {
80+
logger.info("Connecting to Astra "+side+" (with truststore) using host metadata at "+connectionDetails.host+":"+connectionDetails.port);
81+
82+
val scbFile = generateSCB(connectionDetails.host, connectionDetails.port,
83+
connectionDetails.trustStorePassword, connectionDetails.trustStorePath,
84+
connectionDetails.keyStorePassword, connectionDetails.keyStorePath, side)
85+
return CassandraConnector(config
86+
.set("spark.cassandra.auth.username", connectionDetails.username)
87+
.set("spark.cassandra.auth.password", connectionDetails.password)
88+
.set("spark.cassandra.input.consistency.level", consistencyLevel)
89+
.set("spark.cassandra.connection.config.cloud.path", "file://" + scbFile.getAbsolutePath()))
7590
} else if (connectionDetails.trustStorePath.nonEmpty) {
7691
logger.info("Connecting to "+side+" (with truststore) at "+connectionDetails.host+":"+connectionDetails.port);
7792

0 commit comments

Comments
 (0)