@@ -7,8 +7,8 @@ package com.datastax.bdp.spark
7
7
8
8
import java .io .IOException
9
9
10
- import scala . collection . JavaConverters . _
11
- import com .datastax .dse .driver .api .core .cql .continuous .{ ContinuousResultSet , ContinuousSession }
10
+ import com . datastax . dse . driver . api . core . config . DseDriverOption
11
+ import com .datastax .dse .driver .api .core .cql .continuous .ContinuousResultSet
12
12
import com .datastax .oss .driver .api .core .CqlSession
13
13
import com .datastax .oss .driver .api .core .`type` .codec .TypeCodec
14
14
import com .datastax .oss .driver .api .core .cql .{BoundStatement , Statement }
@@ -19,62 +19,52 @@ import com.datastax.spark.connector.rdd.ReadConf
19
19
import com .datastax .spark .connector .util .DriverUtil .toName
20
20
import com .datastax .spark .connector .util ._
21
21
22
+ import scala .collection .JavaConverters ._
23
+
22
24
case class ContinuousPagingScanner (
23
25
readConf : ReadConf ,
24
26
connConf : CassandraConnectorConf ,
25
- columnNames : IndexedSeq [String ]) extends Scanner with Logging {
27
+ columnNames : IndexedSeq [String ],
28
+ cqlSession : CqlSession ) extends Scanner with Logging {
26
29
27
30
val TARGET_PAGE_SIZE_IN_BYTES : Int = 5000 * 50 // 5000 rows * 50 bytes per row
28
31
val MIN_PAGES_PER_SECOND = 1000
29
32
30
- // TODO This must be moved to session initilization? We can no longer pass options at execution time without deriving a new profile
31
- // I think the right thing to do, to support old configurations as well as new is to create a new profile based on options as
32
- // Set using derivied profiles, but this probably can't happen here
33
- /**
34
- private val cpOptions = readConf.throughputMiBPS match {
33
+ private lazy val cpProfile = readConf.throughputMiBPS match {
35
34
case Some (throughput) =>
36
- val bytesPerSecond = (throughput * 1024 * 1024).toInt
35
+ val bytesPerSecond = (throughput * 1024 * 1024 ).toLong
37
36
val fallBackPagesPerSecond = math.max(MIN_PAGES_PER_SECOND , bytesPerSecond / TARGET_PAGE_SIZE_IN_BYTES )
38
- val pagesPerSecond: Int = readConf.readsPerSec.getOrElse(fallBackPagesPerSecond)
37
+ val pagesPerSecond = readConf.readsPerSec.map(_.toLong) .getOrElse(fallBackPagesPerSecond)
39
38
if (readConf.readsPerSec.isEmpty) {
40
39
logInfo(s " Using a pages per second of $pagesPerSecond since " +
41
40
s " ${ReadConf .ReadsPerSecParam .name} is not set " )
42
41
}
43
- val bytesPerPage = ( bytesPerSecond / pagesPerSecond ).toInt
42
+ val bytesPerPage = bytesPerSecond / pagesPerSecond
44
43
45
- if (bytesPerPage <= 0) {
44
+ if (bytesPerPage <= 0 || bytesPerPage > Int . MaxValue ) {
46
45
throw new IllegalArgumentException (
47
46
s """ Read Throttling set to $throughput MBPS, but with the current
48
47
| ${ReadConf .ReadsPerSecParam .name} value of $pagesPerSecond that equates to
49
- | $bytesPerPage bytes per page. This number must be positive and non-zero.
48
+ | $bytesPerPage bytes per page.
49
+ | This number must be positive, non-zero and smaller than ${Int .MaxValue }.
50
50
""" .stripMargin)
51
51
}
52
52
53
53
logDebug(s " Read Throttling set to $throughput mbps. Pages of $bytesPerPage with ${readConf.readsPerSec} max " +
54
54
s " pages per second. ${ReadConf .FetchSizeInRowsParam .name} will be ignored. " )
55
- ContinuousPagingOptions
56
- .builder()
57
- .withPageSize(bytesPerPage, ContinuousPagingOptions.PageUnit.BYTES)
58
- .withMaxPagesPerSecond(pagesPerSecond)
59
- .build()
55
+ cqlSession.getContext.getConfig.getDefaultProfile
56
+ .withBoolean(DseDriverOption .CONTINUOUS_PAGING_PAGE_SIZE_BYTES , true )
57
+ .withInt(DseDriverOption .CONTINUOUS_PAGING_PAGE_SIZE , bytesPerPage.toInt)
58
+ .withInt(DseDriverOption .CONTINUOUS_PAGING_MAX_PAGES_PER_SECOND , pagesPerSecond.toInt)
60
59
61
60
case None =>
62
- ContinuousPagingOptions
63
- .builder()
64
- .withPageSize(readConf.fetchSizeInRows, ContinuousPagingOptions.PageUnit.ROWS)
65
- .withMaxPagesPerSecond(readConf.readsPerSec.getOrElse(Integer.MAX_VALUE))
66
- .build()
61
+ cqlSession.getContext.getConfig.getDefaultProfile
62
+ .withBoolean(DseDriverOption .CONTINUOUS_PAGING_PAGE_SIZE_BYTES , false )
63
+ .withInt(DseDriverOption .CONTINUOUS_PAGING_PAGE_SIZE , readConf.fetchSizeInRows)
64
+ .withInt(DseDriverOption .CONTINUOUS_PAGING_MAX_PAGES_PER_SECOND , readConf.readsPerSec.getOrElse(0 ))
67
65
}
68
- **/
69
-
70
- /**
71
- * Attempts to get or create a session for this execution thread.
72
- */
73
- private val cpSession = new CassandraConnector (connConf)
74
- .openSession()
75
- .asInstanceOf [CqlSession with ContinuousSession ]
76
66
77
- private val codecRegistry = cpSession .getContext.getCodecRegistry
67
+ private val codecRegistry = cqlSession .getContext.getCodecRegistry
78
68
79
69
private def asBoundStatement (statement : Statement [_]): Option [BoundStatement ] = {
80
70
statement match {
@@ -91,22 +81,22 @@ case class ContinuousPagingScanner(
91
81
* Calls SessionProxy Close which issues a deferred close request on the session if no
92
82
* references are requested to it in the next keep_alive ms
93
83
*/
94
- override def close (): Unit = cpSession .close
84
+ override def close (): Unit = cqlSession .close()
95
85
96
- override def getSession (): CqlSession = cpSession
86
+ override def getSession (): CqlSession = cqlSession
97
87
98
88
override def scan [StatementT <: Statement [StatementT ]](statement : StatementT ): ScanResult = {
99
- val authStatement = maybeExecutingAs(statement, readConf.executeAs)
89
+ val authStatement = maybeExecutingAs(statement, readConf.executeAs)
100
90
101
91
if (isSolr(authStatement)) {
102
92
logDebug(" Continuous Paging doesn't work with Search, Falling back to default paging" )
103
- val regularResult = cpSession .execute(authStatement)
93
+ val regularResult = cqlSession .execute(authStatement)
104
94
val regularIterator = regularResult.iterator().asScala
105
95
ScanResult (regularIterator, CassandraRowMetadata .fromResultSet(columnNames, regularResult, codecRegistry))
106
96
107
97
} else {
108
98
try {
109
- val cpResult = cpSession .executeContinuously(authStatement)
99
+ val cpResult = cqlSession .executeContinuously(authStatement.setExecutionProfile(cpProfile) )
110
100
val cpIterator = cpResult.iterator().asScala
111
101
ScanResult (cpIterator, getMetaData(cpResult))
112
102
} catch {
@@ -120,7 +110,7 @@ case class ContinuousPagingScanner(
120
110
}
121
111
}
122
112
123
- private def getMetaData (result : ContinuousResultSet ) = {
113
+ private def getMetaData (result : ContinuousResultSet ): CassandraRowMetadata = {
124
114
import scala .collection .JavaConverters ._
125
115
val columnDefs = result.getColumnDefinitions.asScala
126
116
val rsColumnNames = columnDefs.map(c => toName(c.getName))
@@ -131,3 +121,16 @@ case class ContinuousPagingScanner(
131
121
CassandraRowMetadata (columnNames, Some (rsColumnNames.toIndexedSeq), codecs.toIndexedSeq)
132
122
}
133
123
}
124
+
125
+ object ContinuousPagingScanner {
126
+ def apply (
127
+ readConf : ReadConf ,
128
+ connConf : CassandraConnectorConf ,
129
+ columnNames : IndexedSeq [String ]): ContinuousPagingScanner = {
130
+ /**
131
+ * Attempts to get or create a session for this execution thread.
132
+ */
133
+ val cqlSession = new CassandraConnector (connConf).openSession()
134
+ new ContinuousPagingScanner (readConf, connConf, columnNames, cqlSession)
135
+ }
136
+ }
0 commit comments