diff --git a/.circleci/config.yml b/.circleci/config.yml
index d7f638fa..608b3270 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,14 +1,19 @@
version: 2.1
jobs:
analytics-core-build:
- machine: true
+ machine:
+ image: ubuntu-2004:202008-01
steps:
- checkout
- restore_cache:
key: dp-dependency-cache-{{ checksum "pom.xml" }}
- run:
name: sunbird-analytics-core-build
- command: mvn scoverage:report
+ command: |
+ export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
+ export PATH=$JAVA_HOME/bin:$PATH
+ echo $(java -version)
+ mvn scoverage:report
- save_cache:
key: dp-dependency-cache-{{ checksum "pom.xml" }}
paths: ~/.m2
@@ -20,10 +25,11 @@ jobs:
- run:
name: sonar
command: |
+ export JAVA_HOME=/usr/lib/jvm/java-1.11.0-openjdk-amd64
mvn -X sonar:sonar -Dsonar.projectKey=project-sunbird_sunbird-analytics-core -Dsonar.organization=project-sunbird -Dsonar.exclusions=analytics-core/src/main/scala/org/ekstep/analytics/streaming/** -Dsonar.host.url=https://sonarcloud.io -Dsonar.scala.coverage.reportPaths=/home/circleci/project/target/scoverage.xml
workflows:
version: 2.1
workflow:
jobs:
- - analytics-core-build
\ No newline at end of file
+ - analytics-core-build
diff --git a/.gitignore b/.gitignore
index 2673f6a3..e0c5ae3a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ output-*
dependency-reduced-pom.xml
**/target
**/logs
+**/.idea/**
+*.iml
diff --git a/Jenkinsfile b/Jenkinsfile
index 4b87ace2..8a963927 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -32,8 +32,9 @@ node('build-slave') {
}
stage('Archive artifacts'){
sh """
- mkdir lpa_artifacts
+ mkdir lpa_core_artifacts
cp analytics-job-driver/target/analytics-framework-2.0.jar lpa_core_artifacts
+ cp analytics-core/lib/scruid*.jar lpa_core_artifacts
zip -j lpa_core_artifacts.zip:${artifact_version} lpa_core_artifacts/*
"""
archiveArtifacts artifacts: "lpa_core_artifacts.zip:${artifact_version}", fingerprint: true, onlyIfSuccessful: true
diff --git a/analytics-core/lib/scruid_2.11-2.4.0.jar b/analytics-core/lib/scruid_2.11-2.4.0.jar
new file mode 100644
index 00000000..61e36184
Binary files /dev/null and b/analytics-core/lib/scruid_2.11-2.4.0.jar differ
diff --git a/analytics-core/pom.xml b/analytics-core/pom.xml
index 4d786df9..a7137dbe 100644
--- a/analytics-core/pom.xml
+++ b/analytics-core/pom.xml
@@ -103,8 +103,7 @@
org.apache.httpcomponents
httpclient
- 4.5.2
- provided
+ 4.5.6
com.typesafe
@@ -248,12 +247,75 @@
commons-text
1.6
+
+
+ com.typesafe.scala-logging
+ scala-logging_2.11
+ 3.6.0
+
ing.wbaa.druid
scruid_${scala.maj.version}
- 2.3.0
+ 2.4.0
+ system
+ ${project.basedir}/lib/scruid_2.11-2.4.0.jar
+
+
+
+
+ io.circe
+ circe-core_2.11
+ 0.11.2
+
+
+
+ io.circe
+ circe-parser_2.11
+ 0.11.2
+
+
+
+ io.circe
+ circe-generic_2.11
+ 0.11.2
+
+
+
+ org.mdedetrich
+ akka-stream-json_2.11
+ 0.4.0
+
+
+
+ org.mdedetrich
+ akka-http-json_2.11
+ 0.4.0
+
+
+ org.mdedetrich
+ akka-stream-circe_2.11
+ 0.4.0
+
+
+
+ org.mdedetrich
+ akka-http-circe_2.11
+ 0.4.0
+
+
+
+ com.typesafe.akka
+ akka-http_2.11
+ 10.1.9
+
+
+ io.circe
+ circe-java8_2.11
+ 0.11.1
+
+
org.elasticsearch
elasticsearch-hadoop
@@ -272,6 +334,12 @@
postgresql
9.1-901.jdbc4
+
+ io.zonky.test
+ embedded-postgres
+ 1.2.6
+ test
+
@@ -290,6 +358,8 @@
testCompile
+ 8
+ 8
-dependencyfile
${project.build.directory}/.scala_dependencies
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFetcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFetcher.scala
index 8a3270a8..a14ef530 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFetcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFetcher.scala
@@ -7,11 +7,11 @@ import org.apache.spark.streaming.dstream.DStream
import org.ekstep.analytics.framework.Level.INFO
import org.ekstep.analytics.framework.exception.DataFetcherException
import org.ekstep.analytics.framework.fetcher.{AzureDataFetcher, DruidDataFetcher, S3DataFetcher}
-import org.ekstep.analytics.framework.util.{JSONUtils, JobLogger}
+import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, JobLogger}
/**
- * @author Santhosh
- */
+ * @author Santhosh
+ */
object DataFetcher {
implicit val className = "org.ekstep.analytics.framework.DataFetcher"
@@ -33,37 +33,59 @@ object DataFetcher {
AzureDataFetcher.getObjectKeys(search.queries.get);
case "local" =>
JobLogger.log("Fetching the batch data from Local file")
- search.queries.get.map { x => x.file.getOrElse("") }.filterNot { x => x == null };
+ search.queries.get.map { x => x.file.getOrElse(null) }.filterNot { x => x == null };
case "druid" =>
JobLogger.log("Fetching the batch data from Druid")
val data = DruidDataFetcher.getDruidData(search.druidQuery.get)
+ // $COVERAGE-OFF$
+ // Disabling scoverage as the below code cannot be covered as DruidDataFetcher is not mockable being an object and embedded druid is not available yet
val druidDataList = data.map(f => JSONUtils.deserialize[T](f))
- return sc.parallelize(druidDataList);
+ return druidDataList
+ // $COVERAGE-ON$
case _ =>
throw new DataFetcherException("Unknown fetcher type found");
}
+
if (null == keys || keys.length == 0) {
return sc.parallelize(Seq[T](), JobContext.parallelization);
}
JobLogger.log("Deserializing Input Data", None, INFO);
+ val filteredKeys = search.queries.get.map{q =>
+ getFilteredKeys(q, keys, q.partitions)
+ }.flatMap(f => f)
+
val isString = mf.runtimeClass.getName.equals("java.lang.String");
- sc.textFile(keys.mkString(","), JobContext.parallelization).map { line => {
+ val inputEventsCount = fc.inputEventsCount;
+ sc.textFile(filteredKeys.mkString(","), JobContext.parallelization).map { line => {
try {
+ inputEventsCount.add(1);
if (isString) line.asInstanceOf[T] else JSONUtils.deserialize[T](line);
} catch {
case ex: Exception =>
JobLogger.log(ex.getMessage, None, INFO);
null.asInstanceOf[T]
- }
}
+ }
}.filter { x => x != null };
}
/**
- * API to fetch the streaming data given an array of query objects
- */
+ * API to fetch the streaming data given an array of query objects
+ */
def fetchStreamData[T](sc: StreamingContext, search: Fetcher)(implicit mf: Manifest[T]): DStream[T] = {
null;
}
+ def getFilteredKeys(query: Query, keys: Array[String], partitions: Option[List[Int]]): Array[String] = {
+ if (partitions.nonEmpty) {
+ val finalKeys = keys.map{f =>
+ partitions.get.map{p =>
+ val reg = raw"(\d{4})-(\d{2})-(\d{2})-$p-".r.findFirstIn(f)
+ if(reg.nonEmpty && f.contains(reg.get)) f else ""
+ }
+ }.flatMap(f => f)
+ finalKeys.filter(f => f.nonEmpty)
+ }
+ else keys
+ }
}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFilter.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFilter.scala
index 3b8dd775..bca8cb86 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFilter.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFilter.scala
@@ -133,16 +133,6 @@ object DataFilter {
val eventMap = CommonUtil.caseClassToMap(event)
CommonUtil.getTimestamp(eventMap.get("$attimestamp").get.asInstanceOf[String]).asInstanceOf[AnyRef];
}
- case "gameId" =>
- val gid = getBeanProperty(event, "edata.eks.gid");
- if (null == gid)
- getBeanProperty(event, "gdata.id");
- else
- gid;
- case "genieTag" =>
- val tags = if(event.isInstanceOf[Event]) CommonUtil.getETags(event.asInstanceOf[Event]).app else getBeanProperty(event, "etags").asInstanceOf[ETags].app;
- if (tags.isDefined) tags.get else List()
- case "gameVersion" => getBeanProperty(event, "gdata.ver");
case "userId" => getBeanProperty(event, "uid");
case "sessionId" => getBeanProperty(event, "sid");
case "telemetryVersion" => getBeanProperty(event, "ver");
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/FrameworkContext.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/FrameworkContext.scala
index b34e6ea9..88308508 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/FrameworkContext.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/FrameworkContext.scala
@@ -1,17 +1,27 @@
package org.ekstep.analytics.framework
-import ing.wbaa.druid.DruidConfig
+import ing.wbaa.druid.{DruidConfig, QueryHost}
import ing.wbaa.druid.client.DruidClient
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.types.StructType
import org.sunbird.cloud.storage.BaseStorageService
-import org.sunbird.cloud.storage.conf.AppConf
-import org.sunbird.cloud.storage.factory.{StorageConfig, StorageServiceFactory}
+import org.sunbird.cloud.storage.factory.StorageServiceFactory
import scala.collection.mutable.Map
+import org.ekstep.analytics.framework.util.HadoopFileUtil
+import org.apache.spark.util.LongAccumulator
+import org.ekstep.analytics.framework.conf.AppConf
+import org.ekstep.analytics.framework.fetcher.{AkkaHttpClient, AkkaHttpUtil, DruidDataFetcher}
class FrameworkContext {
var dc: DruidClient = null;
+ var drc: DruidClient = null;
var storageContainers: Map[String, BaseStorageService] = Map();
+ val fileUtil = new HadoopFileUtil();
+
+ var inputEventsCount: LongAccumulator = _
+ var outputEventsCount: LongAccumulator = _
def initialize(storageServices: Option[Array[(String, String, String)]]) {
dc = DruidConfig.DefaultConfig.client;
@@ -26,15 +36,23 @@ class FrameworkContext {
getStorageService(storageType, storageType, storageType);
}
+ def getHadoopFileUtil(): HadoopFileUtil = {
+ return fileUtil;
+ }
+
def getStorageService(storageType: String, storageKey: String, storageSecret: String): BaseStorageService = {
+ if("local".equals(storageType)) {
+ return null;
+ }
if (!storageContainers.contains(storageType + "|" + storageKey)) {
- storageContainers.put(storageType, StorageServiceFactory.getStorageService(StorageConfig(storageType, AppConf.getStorageKey(storageKey), AppConf.getStorageSecret(storageSecret))));
+ storageContainers.put(storageType + "|" + storageKey, StorageServiceFactory.getStorageService(org.sunbird.cloud.storage.factory.StorageConfig(storageType, AppConf.getConfig(storageKey), AppConf.getConfig(storageSecret))));
}
- storageContainers.get(storageType).get
+ storageContainers.get(storageType + "|" + storageKey).get
}
- def setDruidClient(druidClient: DruidClient) {
+ def setDruidClient(druidClient: DruidClient, druidRollupClient: DruidClient) {
dc = druidClient;
+ drc = druidRollupClient;
}
def getDruidClient(): DruidClient = {
@@ -44,19 +62,49 @@ class FrameworkContext {
return dc;
}
+ def getDruidRollUpClient(): DruidClient = {
+ if (null == drc) {
+ val conf = DruidConfig.DefaultConfig
+ drc = DruidConfig.apply(
+ Seq(QueryHost(AppConf.getConfig("druid.rollup.host"), AppConf.getConfig("druid.rollup.port").toInt)),
+ conf.secure,
+ conf.url,conf.healthEndpoint,conf.datasource,conf.responseParsingTimeout,conf.clientBackend,
+ conf.clientConfig,conf.scanQueryLegacyMode,conf.zoneId,conf.system).client
+ }
+ return drc;
+ }
+
+ def getAkkaHttpUtil(): AkkaHttpClient = {
+ AkkaHttpUtil
+ }
+
def shutdownDruidClient() = {
if (dc != null) dc.actorSystem.terminate()
}
+ def shutdownDruidRollUpClien() = {
+ if (drc != null) drc.actorSystem.terminate()
+ }
+
def shutdownStorageService() = {
- if (null != storageContainers) {
+ if (storageContainers.nonEmpty) {
storageContainers.foreach(f => f._2.closeContext());
}
}
def closeContext() = {
shutdownDruidClient();
+ shutdownDruidRollUpClien();
shutdownStorageService();
}
+ def loadData(spark: SparkSession, settings: scala.collection.Map[String, String], url: String, schema: StructType): DataFrame = {
+ if (schema.nonEmpty) {
+ spark.read.schema(schema).format(url).options(settings).load()
+ }
+ else {
+ spark.read.format(url).options(settings).load()
+ }
+ }
+
}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/Models.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/Models.scala
index bf78b54b..acb6011a 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/Models.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/Models.scala
@@ -63,7 +63,7 @@ class ProfileEvent(val eid: String, val ts: String, val `@timestamp`: String, va
case class UserProfile(uid: String, gender: String, age: Int)
// Analytics Framework Job Models
-case class Query(bucket: Option[String] = None, prefix: Option[String] = None, startDate: Option[String] = None, endDate: Option[String] = None, delta: Option[Int] = None, brokerList: Option[String] = None, topic: Option[String] = None, windowType: Option[String] = None, windowDuration: Option[Int] = None, file: Option[String] = None, excludePrefix: Option[String] = None, datePattern: Option[String] = None, folder: Option[String] = None, creationDate: Option[String] = None)
+case class Query(bucket: Option[String] = None, prefix: Option[String] = None, startDate: Option[String] = None, endDate: Option[String] = None, delta: Option[Int] = None, brokerList: Option[String] = None, topic: Option[String] = None, windowType: Option[String] = None, windowDuration: Option[Int] = None, file: Option[String] = None, excludePrefix: Option[String] = None, datePattern: Option[String] = None, folder: Option[String] = None, creationDate: Option[String] = None, partitions: Option[List[Int]] = None)
@scala.beans.BeanInfo
case class Filter(name: String, operator: String, value: Option[AnyRef] = None)
@scala.beans.BeanInfo
@@ -77,11 +77,20 @@ case class JobConfig(search: Fetcher, filters: Option[Array[Filter]], sort: Opti
//Druid Query Models
@scala.beans.BeanInfo
-case class DruidQueryModel(queryType: String, dataSource: String, intervals: String, granularity: Option[String] = Option("all"), aggregations: Option[List[Aggregation]] = Option(List(Aggregation(Option("count"), "count", "count"))), dimensions: Option[List[DruidDimension]] = None, filters: Option[List[DruidFilter]] = None, having: Option[DruidHavingFilter] = None, postAggregation: Option[List[PostAggregation]] = None, threshold: Option[Long] = None, metric: Option[String] = None, descending: Option[String] = Option("false"))
+case class DruidQueryModel(queryType: String, dataSource: String, intervals: String, granularity: Option[String] = Option("all"), aggregations: Option[List[Aggregation]] = Option(List(Aggregation(Option("count"), "count", "count"))), dimensions: Option[List[DruidDimension]] = None, filters: Option[List[DruidFilter]] = None, having: Option[DruidHavingFilter] = None, postAggregation: Option[List[PostAggregation]] = None, columns: Option[List[String]] = None,sqlDimensions: Option[List[DruidSQLDimension]] = None, threshold: Option[Long] = None, metric: Option[String] = None, descending: Option[String] = Option("false"), intervalSlider: Int = 0)
+
+@scala.beans.BeanInfo
+case class DruidSQLQuery(query: String, resultFormat : String = "objectLines", header:Boolean =true )
+
@scala.beans.BeanInfo
-case class DruidDimension(fieldName: String, aliasName: Option[String])
+case class DruidSQLDimension(fieldName: String, function: Option[String])
+
@scala.beans.BeanInfo
-case class Aggregation(name: Option[String], `type`: String, fieldName: String, fnAggregate: Option[String] = None, fnCombine: Option[String] = None, fnReset: Option[String] = None)
+case class DruidDimension(fieldName: String, aliasName: Option[String], `type`: Option[String] = Option("Default"), outputType: Option[String] = None, extractionFn: Option[List[ExtractFn]] = None)
+@scala.beans.BeanInfo
+case class ExtractFn(`type`: String, fn: String, retainMissingValue: Option[Boolean] = Option(false), replaceMissingValueWith: Option[String] = None)
+@scala.beans.BeanInfo
+case class Aggregation(name: Option[String], `type`: String, fieldName: String, fnAggregate: Option[String] = None, fnCombine: Option[String] = None, fnReset: Option[String] = None, lgK: Option[Int] = Option(12), tgtHllType: Option[String] = Option("HLL_4"), round: Option[Boolean] = None, filterAggType: Option[String] = None, filterFieldName: Option[String] = None, filterValue: Option[AnyRef] = None)
@scala.beans.BeanInfo
case class PostAggregation(`type`: String, name: String, fields: PostAggregationFields, fn: String, ordering: Option[String] = None)
// only right field can have type as FieldAccess or Constant. Only if it Constant, need to specify "rightFieldType"
@@ -242,4 +251,21 @@ case class DeviceProfileOutput(device_id: String, first_access: Option[Timestamp
state_custom: Option[String], state_code_custom: Option[String], district_custom: Option[String],
fcm_token: Option[String], producer_id: Option[String], user_declared_state: Option[String],
user_declared_district: Option[String], api_last_updated_on: Option[Timestamp], user_declared_on: Option[Timestamp],
- updated_date: Option[Timestamp] = Option(new Timestamp(System.currentTimeMillis()))) extends AlgoOutput
\ No newline at end of file
+ updated_date: Option[Timestamp] = Option(new Timestamp(System.currentTimeMillis()))) extends AlgoOutput
+
+
+case class StorageConfig(store: String, container: String, fileName: String, accountKey: Option[String] = None, secretKey: Option[String] = None);
+
+case class OnDemandJobRequest(request_id: String, request_data : String,download_urls :List[String], status: String)
+
+@scala.beans.BeanInfo
+case class DruidOutput(t: Map[String, Any]) extends Map[String,Any] with Input with AlgoInput with AlgoOutput with Output {
+ private val internalMap = t
+ override def +[B1 >: Any](kv: (String, B1)): Map[String, B1] = new DruidOutput(internalMap + kv)
+
+ override def get(key: String): Option[Any] =internalMap.get(key)
+
+ override def iterator: Iterator[(String, Any)] = internalMap.iterator
+
+ override def -(key: String): Map[String, Any] = new DruidOutput(internalMap - key)
+}
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/OutputDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/OutputDispatcher.scala
index 2d6a5305..f68a0bd6 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/OutputDispatcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/OutputDispatcher.scala
@@ -20,11 +20,6 @@ object OutputDispatcher {
implicit val className = "org.ekstep.analytics.framework.OutputDispatcher";
- @throws(classOf[DispatcherException])
- private def _dispatch(dispatcher: Dispatcher, events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = {
- DispatcherFactory.getDispatcher(dispatcher).dispatch(dispatcher.params, events);
- }
-
@throws(classOf[DispatcherException])
def dispatch[T](outputs: Option[Array[Dispatcher]], events: RDD[T])(implicit sc: SparkContext, fc: FrameworkContext): Long = {
@@ -36,8 +31,7 @@ object OutputDispatcher {
JobLogger.log("Dispatching output", Option(dispatcher.to));
DispatcherFactory.getDispatcher(dispatcher).dispatch(dispatcher.params, eventArr);
}
- events.count;
-
+ 0
}
@throws(classOf[DispatcherException])
@@ -50,20 +44,16 @@ object OutputDispatcher {
DispatcherFactory.getDispatcher(dispatcher).dispatch(dispatcher.params, eventArr);
events.count;
}
-
+
@throws(classOf[DispatcherException])
- def dispatch[T](dispatcher: Dispatcher, events: Array[String])(implicit fc: FrameworkContext) = {
+ def dispatch[T](config: StorageConfig, events: RDD[T])(implicit sc: SparkContext, fc: FrameworkContext): Long = {
- if (null == dispatcher) {
- throw new DispatcherException("No output configurations found");
- }
- if (events.length != 0) {
- JobLogger.log("Dispatching output", Option(dispatcher.to));
- DispatcherFactory.getDispatcher(dispatcher).dispatch(events, dispatcher.params);
- } else {
- JobLogger.log("No events produced");
- null;
+ if (null == config) {
+ throw new DispatcherException("No configuration found");
}
+ val eventArr = stringify(events);
+ DispatcherFactory.getDispatcher(config).dispatch(eventArr, config);
+ events.count;
}
def stringify[T](events: RDD[T]): RDD[String] = {
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModel.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModel.scala
new file mode 100644
index 00000000..9feede13
--- /dev/null
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModel.scala
@@ -0,0 +1,18 @@
+package org.ekstep.analytics.framework
+
+import org.apache.spark.sql.{Dataset, SparkSession}
+
+
+
+trait ReportOnDemandModel[T] {
+
+ def execute(reportParams: Option[Map[String, AnyRef]])(implicit spark: SparkSession, fc: FrameworkContext) : Unit
+
+ def getJobRequest(jobId: String) (implicit sparkSession: SparkSession, fc: FrameworkContext): Dataset[T]
+
+ def updateJobRequest(reportBlobs : Dataset[T]) (implicit sparkSession: SparkSession, fc: FrameworkContext): Unit
+
+ def name() : String = "OnDemandReportModel";
+
+
+}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModelTemplate.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModelTemplate.scala
new file mode 100644
index 00000000..b5d59191
--- /dev/null
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModelTemplate.scala
@@ -0,0 +1,113 @@
+package org.ekstep.analytics.framework
+
+import java.sql.DriverManager
+import java.util.{Date, Properties}
+
+import org.apache.spark.sql._
+import org.ekstep.analytics.framework.conf.AppConf
+import org.ekstep.analytics.framework.util.CommonUtil
+
+
+trait ReportOnDemandModelTemplate[A <: AnyRef, B <: AnyRef] extends ReportOnDemandModel[OnDemandJobRequest] {
+
+ val connProperties: Properties = CommonUtil.getPostgresConnectionProps()
+ val db: String = AppConf.getConfig("postgres.db")
+ val url: String = AppConf.getConfig("postgres.url") + s"$db"
+ val report_config_table: String = AppConf.getConfig("postgres.table.job_request")
+
+ /**
+ * Override and implement the data product execute method,
+ * 1. filterReports
+ * 2. generateReports
+ * 3. saveReports
+ */
+ override def execute(reportParams: Option[Map[String, AnyRef]])(implicit spark: SparkSession, fc: FrameworkContext) = {
+
+ val config = reportParams.getOrElse(Map[String, AnyRef]())
+
+ val reportConfigList = getJobRequest(config.getOrElse("jobId", "").asInstanceOf[String])
+
+ val filteredReports = filterReports(reportConfigList, config)
+
+ val generatedReports = generateReports(filteredReports, config)
+
+ val savedReportsList = saveReports(generatedReports, config)
+
+ updateJobRequest(savedReportsList)
+
+ }
+
+ /**
+ * Method will get the list of active on demand reports from table
+ *
+ * @param jobId job id of the report
+ * @param spark sparkSession implicit
+ * @param fc framework context
+ * @return
+ */
+ override def getJobRequest(jobId: String)(implicit spark: SparkSession, fc: FrameworkContext): Dataset[OnDemandJobRequest] = {
+
+ val encoder = Encoders.product[OnDemandJobRequest]
+ import org.apache.spark.sql.functions.col
+ val reportConfigsDf = spark.sqlContext.sparkSession.read.jdbc(url, report_config_table, connProperties)
+ .where(col("job_id") === jobId).where(col("status") === "SUBMITTED")
+ .select("request_id", "request_data","download_urls","status")
+ reportConfigsDf.as[OnDemandJobRequest](encoder)
+ }
+
+
+ /**
+ * Method will save the list of report location urls for each request id
+ *
+ * @param reportLocationsDf Dataset with list of report paths per request id
+ * @param spark sparkSession implict
+ * @param fc frameWorkContext implicit
+ * @return
+ */
+ override def updateJobRequest(reportLocationsDf: Dataset[OnDemandJobRequest])(implicit spark: SparkSession, fc: FrameworkContext) = {
+ val connProperties: Properties = CommonUtil.getPostgresConnectionProps()
+ val db: String = AppConf.getConfig("postgres.db")
+ val url: String = AppConf.getConfig("postgres.url") + s"$db"
+ val report_config_table: String = AppConf.getConfig("postgres.table.job_request")
+ val user = connProperties.getProperty("user")
+ val pass = connProperties.getProperty("password")
+ reportLocationsDf.rdd.foreachPartition { rddPartition: Iterator[OnDemandJobRequest] =>
+ val connection = DriverManager.getConnection(url, user, pass)
+ val statement = connection.createStatement()
+ rddPartition.foreach { report =>
+ val url = report.download_urls.mkString(",")
+ val row =
+ s""" UPDATE ${report_config_table} SET download_urls = '{${url}}',
+ |dt_job_completed = '${new Date()}',status = 'COMPLETED' where request_id='${report.request_id}' """.stripMargin
+ statement.addBatch(row)
+ }
+ statement.executeBatch()
+ statement.close()
+ connection.close()
+ }
+
+ }
+
+ /**
+ * filter Reports steps before generating Report. Few pre-process steps are
+ * 1. Combine or filter the report configs an
+ * 2. Join or fetch Data from Tables
+ */
+ def filterReports(reportConfigs: Dataset[OnDemandJobRequest], config: Map[String, AnyRef])(implicit spark: SparkSession, fc: FrameworkContext): Dataset[A]
+
+ /**
+ * Method which will generate report
+ * Input : List of Filtered Ids to generate Report
+ * Output : List of Files to be saved per request
+ */
+ def generateReports(reports: Dataset[A], config: Map[String, AnyRef])(implicit spark: SparkSession, fc: FrameworkContext): Dataset[B]
+
+ /**
+ * .
+ * 1. Saving Reports to Blob
+ * 2. Generate Metrics
+ * 3. Return Map list of blobs to RequestIds as per the request
+ */
+ def saveReports(reports: Dataset[B], config: Map[String, AnyRef])(implicit spark: SparkSession, fc: FrameworkContext): Dataset[OnDemandJobRequest]
+
+}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/AzureDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/AzureDispatcher.scala
index b1ba1532..9aca1289 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/AzureDispatcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/AzureDispatcher.scala
@@ -5,74 +5,50 @@ import scala.concurrent.ExecutionContext.Implicits.global
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.ekstep.analytics.framework.exception.DispatcherException
-import org.ekstep.analytics.framework.util.{CommonUtil, JobLogger}
+import org.ekstep.analytics.framework.util.{ CommonUtil, JobLogger }
import org.sunbird.cloud.storage.conf.AppConf
-import org.sunbird.cloud.storage.factory.{StorageConfig, StorageServiceFactory}
+import org.sunbird.cloud.storage.factory.{ StorageServiceFactory }
import org.ekstep.analytics.framework.Level
import scala.concurrent.Await
import org.ekstep.analytics.framework.FrameworkContext
+import org.apache.hadoop.fs.FileUtil
+import org.apache.hadoop.fs.FileSystem
+import java.net.URI
+import org.apache.hadoop.fs.Path
+import org.ekstep.analytics.framework.util.JSONUtils
+import org.ekstep.analytics.framework.StorageConfig
-object AzureDispatcher extends IDispatcher {
+object AzureDispatcher extends HadoopDispatcher with IDispatcher {
- implicit val className = "org.ekstep.analytics.framework.dispatcher.AzureDispatcher"
+ implicit val className = "org.ekstep.analytics.framework.dispatcher.AzureDispatcher"
- @throws(classOf[DispatcherException])
- def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = {
- var filePath = config.getOrElse("filePath", null).asInstanceOf[String];
- val bucket = config.getOrElse("bucket", null).asInstanceOf[String];
- val key = config.getOrElse("key", null).asInstanceOf[String];
- val zip = config.getOrElse("zip", false).asInstanceOf[Boolean];
- val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean];
+ override def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = {
- if (null == bucket || null == key) {
- throw new DispatcherException("'bucket' & 'key' parameters are required to send output to azure")
- }
- var deleteFile = false;
- if (null == filePath) {
- filePath = AppConf.getConfig("spark_output_temp_dir") + "output-" + System.currentTimeMillis() + ".log";
- val fw = new FileWriter(filePath, true);
- events.foreach { x => { fw.write(x + "\n"); } };
- fw.close();
- deleteFile = true;
- }
- val finalPath = if (zip) CommonUtil.gzip(filePath) else filePath;
- val storageService = fc.getStorageService("azure");
- storageService.upload(bucket, finalPath, key, Option(isPublic), None, None, None);
- storageService.closeContext();
- if (deleteFile) CommonUtil.deleteFile(filePath);
- if (zip) CommonUtil.deleteFile(finalPath);
- events;
- }
-
- def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = {
-
-// dispatch(events.collect(), config);
- val bucket = config.getOrElse("bucket", null).asInstanceOf[String];
- val key = config.getOrElse("key", null).asInstanceOf[String];
- val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean];
+ val bucket = config.getOrElse("bucket", null).asInstanceOf[String];
+ val key = config.getOrElse("key", null).asInstanceOf[String];
- if (null == bucket || null == key) {
- throw new DispatcherException("'bucket' & 'key' parameters are required to send output to azure")
- }
- events.saveAsTextFile("wasb://" + bucket + "@" + AppConf.getStorageKey(AppConf.getStorageType()) + ".blob.core.windows.net/" + key);
+ if (null == bucket || null == key) {
+ throw new DispatcherException("'bucket' & 'key' parameters are required to send output to azure")
}
- def dispatchDirectory(config: Map[String, AnyRef])(implicit sc: SparkContext, fc: FrameworkContext) = {
- val dirPath = config.getOrElse("dirPath", null).asInstanceOf[String]
- val bucket = config.getOrElse("bucket", null).asInstanceOf[String]
- val key = config.getOrElse("key", null).asInstanceOf[String]
- val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean]
+ val srcFile = CommonUtil.getAzureFile(bucket, "_tmp/" + key);
+ val destFile = CommonUtil.getAzureFile(bucket, key);
- if (null == bucket || null == key || dirPath == null) {
- throw new DispatcherException("'local file path', 'bucket' & 'key' parameters are required to upload directory to azure")
- }
+ dispatchData(srcFile, destFile, sc.hadoopConfiguration, events)
+ }
- val storageService = fc.getStorageService("azure");
- val uploadMsg = storageService.upload(bucket, dirPath, key, Option(true), Option(1), Option(3), None)
- storageService.closeContext();
- JobLogger.log("Successfully Uploaded files", Option(Map("filesUploaded" -> "")), Level.INFO)
- CommonUtil.deleteDirectory(dirPath)
+ override def dispatch(events: RDD[String], config: StorageConfig)(implicit sc: SparkContext, fc: FrameworkContext) = {
+ val bucket = config.container;
+ val key = config.fileName;
+
+ if (null == bucket || null == key || bucket.isEmpty() || key.isEmpty()) {
+ throw new DispatcherException("'bucket' & 'key' parameters are required to send output to azure")
}
+ val srcFile = CommonUtil.getAzureFile(bucket, "_tmp/" + key, config.accountKey.getOrElse("azure_storage_key"));
+ val destFile = CommonUtil.getAzureFile(bucket, key, config.accountKey.getOrElse("azure_storage_key"));
+
+ dispatchData(srcFile, destFile, sc.hadoopConfiguration, events)
+ }
}
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ConsoleDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ConsoleDispatcher.scala
index c547a502..22e745da 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ConsoleDispatcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ConsoleDispatcher.scala
@@ -19,7 +19,7 @@ object ConsoleDispatcher extends IDispatcher {
events;
}
- def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = {
+ def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext): Unit = {
if (config.getOrElse("printEvent", true).asInstanceOf[Boolean]) {
for (event <- events) {
println("Event", event);
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ESDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ESDispatcher.scala
index 65dad1de..3c86b680 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ESDispatcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ESDispatcher.scala
@@ -20,7 +20,4 @@ object ESDispatcher extends IDispatcher {
events.saveToEs(s"$index/_doc", Map("es.input.json" -> "true"))
}
- override def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = {
- events
- }
}
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/FileDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/FileDispatcher.scala
index 1a3aeed2..f45cefb7 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/FileDispatcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/FileDispatcher.scala
@@ -10,30 +10,36 @@ import java.nio.file.Files
import java.nio.file.Paths
import org.apache.spark.SparkContext
import org.ekstep.analytics.framework.FrameworkContext
+import java.io.File
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.FileUtil
+import org.apache.commons.io.FileUtils
+import org.ekstep.analytics.framework.util.CommonUtil
+import org.ekstep.analytics.framework.StorageConfig
/**
* @author Santhosh
*/
-object FileDispatcher extends IDispatcher {
-
- implicit val className = "org.ekstep.analytics.framework.dispatcher.FileDispatcher";
-
- @throws(classOf[DispatcherException])
- def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = {
- val filePath = config.getOrElse("file", null).asInstanceOf[String];
- if (null == filePath) {
- throw new DispatcherException("'file' parameter is required to send output to file");
- }
- val dir = filePath.substring(0, filePath.lastIndexOf("/"));
- Files.createDirectories(Paths.get(dir));
- val fw = new FileWriter(filePath, true);
- events.foreach { x => { fw.write(x + "\n"); } };
- fw.close();
- events;
- }
-
- def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = {
- dispatch(events.collect(), config);
+object FileDispatcher extends HadoopDispatcher with IDispatcher {
+
+ implicit val className = "org.ekstep.analytics.framework.dispatcher.FileDispatcher";
+
+ override def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext): Unit = {
+ val filePath = config.getOrElse("file", null).asInstanceOf[String];
+ if (null == filePath) {
+ throw new DispatcherException("'file' parameter is required to send output to file");
}
+ val path = new File(filePath);
+ val index = path.getPath.lastIndexOf(path.getName);
+ val prefix = path.getPath.substring(0, index)
+
+ dispatchData(prefix + "_tmp/" + path.getName, filePath, sc.hadoopConfiguration, events)
+ }
+
+ override def dispatch(events: RDD[String], config: StorageConfig)(implicit sc: SparkContext, fc: FrameworkContext): Unit = {
+ val file = config.fileName;
+ dispatch(Map[String, AnyRef]("file" -> file), events);
+ }
+
}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/HadoopDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/HadoopDispatcher.scala
new file mode 100644
index 00000000..bb0bbfdd
--- /dev/null
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/HadoopDispatcher.scala
@@ -0,0 +1,28 @@
+package org.ekstep.analytics.framework.dispatcher
+
+import org.ekstep.analytics.framework.exception.DispatcherException
+import java.io.FileWriter
+import org.ekstep.analytics.framework.OutputDispatcher
+import org.apache.spark.rdd.RDD
+import org.ekstep.analytics.framework.util.JobLogger
+import org.apache.commons.lang3.StringUtils
+import java.nio.file.Files
+import java.nio.file.Paths
+import org.apache.spark.SparkContext
+import org.ekstep.analytics.framework.FrameworkContext
+import org.apache.hadoop.conf.Configuration
+
+/**
+ * @author Santhosh
+ */
+trait HadoopDispatcher {
+
+ def dispatchData(srcFile: String, destFile: String, conf: Configuration, events: RDD[String])(implicit fc: FrameworkContext) = {
+
+ val fileUtil = fc.getHadoopFileUtil();
+ fileUtil.delete(conf, srcFile, destFile);
+ events.saveAsTextFile(srcFile);
+ fileUtil.copyMerge(srcFile, destFile, conf, true);
+ }
+
+}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/IDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/IDispatcher.scala
index b041dff4..e68ac504 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/IDispatcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/IDispatcher.scala
@@ -4,6 +4,7 @@ import org.ekstep.analytics.framework.exception.DispatcherException
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.ekstep.analytics.framework.FrameworkContext
+import org.ekstep.analytics.framework.StorageConfig
/**
* @author Santhosh
@@ -11,9 +12,11 @@ import org.ekstep.analytics.framework.FrameworkContext
trait IDispatcher {
@throws(classOf[DispatcherException])
- def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext) : Array[String];
+ def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) : Unit;
@throws(classOf[DispatcherException])
- def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext);
+ def dispatch(events: RDD[String], config: StorageConfig)(implicit sc: SparkContext, fc: FrameworkContext) : Unit = {
+ throw new DispatcherException("Not supported method");
+ }
}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/KafkaDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/KafkaDispatcher.scala
index a31544de..094a99db 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/KafkaDispatcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/KafkaDispatcher.scala
@@ -28,19 +28,23 @@ object KafkaDispatcher extends IDispatcher {
def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = {
val brokerList = config.getOrElse("brokerList", null).asInstanceOf[String];
val topic = config.getOrElse("topic", null).asInstanceOf[String];
+ val batchSize = config.getOrElse("batchSize", 100).asInstanceOf[Integer];
+ val lingerMs = config.getOrElse("lingerMs", 10).asInstanceOf[Integer];
if (null == brokerList) {
throw new DispatcherException("brokerList parameter is required to send output to kafka")
}
if (null == topic) {
throw new DispatcherException("topic parameter is required to send output to kafka")
}
- KafkaEventProducer.sendEvents(events, topic, brokerList)
+ KafkaEventProducer.sendEvents(events, topic, brokerList, batchSize, lingerMs)
events
}
def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = {
val brokerList = config.getOrElse("brokerList", null).asInstanceOf[String]
val topic = config.getOrElse("topic", null).asInstanceOf[String]
+ val batchSize = config.getOrElse("batchSize", 100).asInstanceOf[Integer];
+ val lingerMs = config.getOrElse("lingerMs", 10).asInstanceOf[Integer];
if (null == brokerList) {
throw new DispatcherException("brokerList parameter is required to send output to kafka")
}
@@ -49,7 +53,7 @@ object KafkaDispatcher extends IDispatcher {
}
events.foreachPartition((partitions: Iterator[String]) => {
- val kafkaSink = KafkaSink(_getKafkaProducerConfig(brokerList));
+ val kafkaSink = KafkaSink(_getKafkaProducerConfig(brokerList, batchSize, lingerMs));
partitions.foreach { message =>
try {
kafkaSink.send(topic, message, new Callback {
@@ -76,12 +80,15 @@ object KafkaDispatcher extends IDispatcher {
}
- private def _getKafkaProducerConfig(brokerList: String): HashMap[String, Object] = {
+ private def _getKafkaProducerConfig(brokerList: String, batchSize: Integer, lingerMs: Integer): HashMap[String, Object] = {
val props = new HashMap[String, Object]()
- props.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, 3000L.asInstanceOf[Long])
+ props.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize);
+ props.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 60000.asInstanceOf[Integer]);
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
+ props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy")
+ props.put(ProducerConfig.LINGER_MS_CONFIG, lingerMs)
props
}
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3Dispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3Dispatcher.scala
index 2d8e1e9c..dfb75234 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3Dispatcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3Dispatcher.scala
@@ -8,57 +8,37 @@ import org.ekstep.analytics.framework.Level._
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext
import org.sunbird.cloud.storage.factory.StorageServiceFactory
-import org.sunbird.cloud.storage.factory.StorageConfig
import org.sunbird.cloud.storage.conf.AppConf
import org.ekstep.analytics.framework.FrameworkContext
+import org.ekstep.analytics.framework.StorageConfig
/**
* @author Santhosh
*/
-object S3Dispatcher extends IDispatcher {
+object S3Dispatcher extends HadoopDispatcher with IDispatcher {
- implicit val className = "org.ekstep.analytics.framework.dispatcher.S3Dispatcher"
+ implicit val className = "org.ekstep.analytics.framework.dispatcher.S3Dispatcher"
- @throws(classOf[DispatcherException])
- def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = {
- var filePath = config.getOrElse("filePath", null).asInstanceOf[String];
- val bucket = config.getOrElse("bucket", null).asInstanceOf[String];
- val key = config.getOrElse("key", null).asInstanceOf[String];
- val zip = config.getOrElse("zip", false).asInstanceOf[Boolean];
- val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean];
+ override def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext): Unit = {
- if (null == bucket || null == key) {
- throw new DispatcherException("'bucket' & 'key' parameters are required to send output to S3")
- }
- var deleteFile = false;
- if (null == filePath) {
- filePath = AppConf.getConfig("spark_output_temp_dir") + "output-" + System.currentTimeMillis() + ".log";
- val fw = new FileWriter(filePath, true);
- events.foreach { x => { fw.write(x + "\n"); } };
- fw.close();
- deleteFile = true;
- }
- val finalPath = if (zip) CommonUtil.gzip(filePath) else filePath;
- Console.println(bucket, finalPath, key, Option(isPublic))
- Console.println("FC class type", fc.getClass.getTypeName);
- val storageService = fc.getStorageService("aws");
- storageService.upload(bucket, finalPath, key, Option(isPublic), None, None, None);
- storageService.closeContext();
- if (deleteFile) CommonUtil.deleteFile(filePath);
- if (zip) CommonUtil.deleteFile(finalPath);
- events;
- }
-
- def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = {
-
- val bucket = config.getOrElse("bucket", null).asInstanceOf[String];
- val key = config.getOrElse("key", null).asInstanceOf[String];
- val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean];
+ val bucket = config.getOrElse("bucket", null).asInstanceOf[String];
+ val key = config.getOrElse("key", null).asInstanceOf[String];
+ val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean];
- if (null == bucket || null == key) {
- throw new DispatcherException("'bucket' & 'key' parameters are required to send output to S3")
- }
- events.saveAsTextFile("s3n://" + bucket + "/" + key);
+ if (null == bucket || null == key) {
+ throw new DispatcherException("'bucket' & 'key' parameters are required to send output to S3")
}
+ val srcFile = CommonUtil.getS3File(bucket, "_tmp/" + key);
+ val destFile = CommonUtil.getS3File(bucket, key);
+ dispatchData(srcFile, destFile, sc.hadoopConfiguration, events)
+ }
+
+ override def dispatch(events: RDD[String], config: StorageConfig)(implicit sc: SparkContext, fc: FrameworkContext): Unit = {
+ val bucket = config.container;
+ val key = config.fileName;
+
+ dispatch(Map[String, AnyRef]("bucket" -> bucket, "key" -> key), events);
+ }
+
}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3FileDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3FileDispatcher.scala
deleted file mode 100644
index 87da2c40..00000000
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3FileDispatcher.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-package org.ekstep.analytics.framework.dispatcher
-
-import java.io.FileWriter
-
-import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
-import org.ekstep.analytics.framework.exception.DispatcherException
-import org.ekstep.analytics.framework.util.CommonUtil
-import org.sunbird.cloud.storage.factory.StorageServiceFactory
-import org.sunbird.cloud.storage.factory.StorageConfig
-import org.sunbird.cloud.storage.conf.AppConf
-import org.ekstep.analytics.framework.FrameworkContext
-
-/**
- * @author Santhosh
- */
-object S3FileDispatcher extends IDispatcher {
-
- implicit val className = "org.ekstep.analytics.framework.dispatcher.S3FileDispatcher"
-
- @throws(classOf[DispatcherException])
- def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = {
- var filePath = config.getOrElse("filePath", null).asInstanceOf[String];
- val bucket = config.getOrElse("bucket", null).asInstanceOf[String];
- val key = config.getOrElse("key", null).asInstanceOf[String];
- val zip = config.getOrElse("zip", false).asInstanceOf[Boolean];
- val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean];
-
- if (null == bucket || null == key) {
- throw new DispatcherException("'bucket' & 'key' parameters are required to send output to S3")
- }
- var deleteFile = false;
- if (null == filePath) {
- filePath = AppConf.getConfig("spark_output_temp_dir") + "output-" + System.currentTimeMillis() + ".log";
- val fw = new FileWriter(filePath, true);
- events.foreach { x => { fw.write(x + "\n"); } };
- fw.close();
- deleteFile = true;
- }
- val finalPath = if (zip) CommonUtil.gzip(filePath) else filePath;
- val storageService = fc.getStorageService("aws");
- storageService.upload(bucket, finalPath, key, Option(isPublic), None, None, None);
- storageService.closeContext();
- if (deleteFile) CommonUtil.deleteFile(filePath);
- if (zip) CommonUtil.deleteFile(finalPath);
- events;
- }
-
- def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = {
- dispatch(events.collect(), config);
- }
-
-}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/SlackDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/SlackDispatcher.scala
index 738329e5..cc1961ec 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/SlackDispatcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/SlackDispatcher.scala
@@ -29,9 +29,7 @@ object SlackDispatcher extends IDispatcher {
}
val webhookUrl = AppConf.getConfig("monitor.notification.webhook_url")
- val message = if (hasAttachments.equalsIgnoreCase("true")) {
- SlackMessage(channel, userName, attachments = Some(events.map(JSONUtils.deserialize[Attachments](_))))
- } else SlackMessage(channel, userName, text = Some(events.mkString(",")))
+ val message = SlackMessage(channel, userName, text = Some(events.mkString(",")))
val resp = RestUtil.post[String](webhookUrl, JSONUtils.serialize(message))
events
}
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/factory/DispatcherFactory.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/factory/DispatcherFactory.scala
index 1c912339..aba8e3a5 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/factory/DispatcherFactory.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/factory/DispatcherFactory.scala
@@ -5,6 +5,7 @@ import org.ekstep.analytics.framework.dispatcher._
import org.ekstep.analytics.framework.exception.DispatcherException
import org.ekstep.analytics.framework.util.JobLogger
import org.ekstep.analytics.framework.Level._
+import org.ekstep.analytics.framework.StorageConfig
/**
* @author Santhosh
@@ -14,8 +15,6 @@ object DispatcherFactory {
@throws(classOf[DispatcherException])
def getDispatcher(disp: Dispatcher): IDispatcher = {
disp.to.toLowerCase() match {
- case "s3file" =>
- S3FileDispatcher;
case "s3" =>
S3Dispatcher;
case "kafka" =>
@@ -36,4 +35,18 @@ object DispatcherFactory {
throw new DispatcherException("Unknown output dispatcher destination found");
}
}
+
+ @throws(classOf[DispatcherException])
+ def getDispatcher(config: StorageConfig): IDispatcher = {
+ config.store.toLowerCase() match {
+ case "s3" =>
+ S3Dispatcher;
+ case "local" =>
+ FileDispatcher;
+ case "azure" =>
+ AzureDispatcher;
+ case _ =>
+ throw new DispatcherException("Unknown output dispatcher destination found");
+ }
+ }
}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/AzureDataFetcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/AzureDataFetcher.scala
index 2aa8cf3e..733225b0 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/AzureDataFetcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/AzureDataFetcher.scala
@@ -28,7 +28,7 @@ object AzureDataFetcher {
}
private def getKeys(query: Query)(implicit fc: FrameworkContext) : Array[String] = {
- val storageService = fc.getStorageService("azure");
+ val storageService = fc.getStorageService("azure", "azure_storage_key", "azure_storage_secret");
val keys = storageService.searchObjects(getBucket(query.bucket), getPrefix(query.prefix), query.startDate, query.endDate, query.delta, query.datePattern.getOrElse("yyyy-MM-dd"))
storageService.getPaths(getBucket(query.bucket), keys).toArray
}
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/DruidDataFetcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/DruidDataFetcher.scala
index 80aac70c..6828a927 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/DruidDataFetcher.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/DruidDataFetcher.scala
@@ -1,218 +1,409 @@
package org.ekstep.analytics.framework.fetcher
+import java.sql.{Connection, DriverManager, ResultSet, Statement}
import java.time.format.DateTimeFormatter
+import java.util.Properties
+import akka.actor.ActorSystem
+import akka.http.scaladsl.Http
+import akka.http.scaladsl.model._
+import akka.stream.ActorMaterializer
+import akka.stream.scaladsl.{Flow, Framing, Keep, Sink, Source}
+import akka.util.ByteString
import ing.wbaa.druid._
-import ing.wbaa.druid.client.DruidHttpClient
import ing.wbaa.druid.definitions._
import ing.wbaa.druid.dql.DSL._
import ing.wbaa.druid.dql.Dim
import ing.wbaa.druid.dql.expressions.{AggregationExpression, FilteringExpression, PostAggregationExpression}
import io.circe.Json
-import org.ekstep.analytics.framework.conf.AppConf
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.ekstep.analytics.framework._
import org.ekstep.analytics.framework.exception.DataFetcherException
-import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils}
-import org.ekstep.analytics.framework.{DruidQueryModel, FrameworkContext, PostAggregationFields}
+import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, ResultAccumulator}
+import org.sunbird.cloud.storage.conf.AppConf
-import scala.concurrent.Await
+import scala.concurrent.{Await, ExecutionContextExecutor, Future}
+
+
+trait AkkaHttpClient {
+ def sendRequest(httpRequest: HttpRequest)(implicit actorSystem: ActorSystem): Future[HttpResponse]
+}
+
+object AkkaHttpUtil extends AkkaHttpClient {
+ def sendRequest(httpRequest: HttpRequest)(implicit actorSystem: ActorSystem): Future[HttpResponse] ={
+ Http().singleRequest(httpRequest)
+ }
+}
object DruidDataFetcher {
-
- @throws(classOf[DataFetcherException])
- def getDruidData(query: DruidQueryModel)(implicit fc: FrameworkContext): List[String] = {
- val request = getDruidQuery(query)
- val result = executeDruidQuery(request);
- processResult(query, result);
- }
+ @throws(classOf[DataFetcherException])
+ def getDruidData(query: DruidQueryModel, queryAsStream: Boolean = false)(implicit sc: SparkContext, fc: FrameworkContext): RDD[String] = {
+ val request = getDruidQuery(query)
+ fc.inputEventsCount = sc.longAccumulator("DruidDataCount")
+ if (queryAsStream) {
+ executeQueryAsStream(query, request)
- def getDruidQuery(query: DruidQueryModel): DruidQuery = {
-
- query.queryType.toLowerCase() match {
- case "groupby" => {
- val DQLQuery = DQL
- .from(query.dataSource)
- .granularity(CommonUtil.getGranularity(query.granularity.getOrElse("all")))
- .interval(CommonUtil.getIntervalRange(query.intervals))
- .agg(getAggregation(query): _*)
- .groupBy(query.dimensions.get.map(f => Dim(f.fieldName, f.aliasName)): _*)
- if(query.filters.nonEmpty) DQLQuery.where(getFilter(query).get)
- if(query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query).get: _*)
- if(query.having.nonEmpty) DQLQuery.having(getGroupByHaving(query).get)
- DQLQuery.build()
- }
- case "topn" => {
- val DQLQuery = DQL
- .from(query.dataSource)
- .granularity(CommonUtil.getGranularity(query.granularity.getOrElse("all")))
- .interval(CommonUtil.getIntervalRange(query.intervals))
- .topN(Dim(query.dimensions.get.head.fieldName, query.dimensions.get.head.aliasName), query.metric.getOrElse("count"), query.threshold.getOrElse(100).asInstanceOf[Int])
- .agg(getAggregation(query): _*)
- if(query.filters.nonEmpty) DQLQuery.where(getFilter(query).get)
- if(query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query).get: _*)
- DQLQuery.build()
- }
- case "timeseries" => {
- val DQLQuery = DQL
- .from(query.dataSource)
- .granularity(CommonUtil.getGranularity(query.granularity.getOrElse("all")))
- .interval(CommonUtil.getIntervalRange(query.intervals))
- .agg(getAggregation(query): _*)
- if(query.filters.nonEmpty) DQLQuery.where(getFilter(query).get)
- if(query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query).get: _*)
- DQLQuery.build()
- }
- case _ =>
- throw new DataFetcherException("Unknown druid query type found");
- }
+ } else {
+ val response = executeDruidQuery(query, request)
+ query.queryType.toLowerCase() match {
+ case "timeseries" | "groupby" | "topn"=>
+ sc.parallelize(processResult(query, response.asInstanceOf[DruidResponseTimeseriesImpl].results))
+ case "scan" =>
+ sc.parallelize(processResult (query, response.asInstanceOf[DruidScanResponse].results.flatMap(f => f.events)))
+ }
}
- def executeDruidQuery(query: DruidQuery)(implicit fc: FrameworkContext) : DruidResponse = {
- val response = fc.getDruidClient().doQuery(query);
- val queryWaitTimeInMins = AppConf.getConfig("druid.query.wait.time.mins").toLong
- Await.result(response, scala.concurrent.duration.Duration.apply(queryWaitTimeInMins, "minute"))
- }
-
- def processResult(query: DruidQueryModel, result: DruidResponse) : List[String] = {
- if(result.results.length > 0) {
- query.queryType.toLowerCase match {
- case "timeseries" | "groupby" =>
- val series = result.results.map { f =>
- f.result.asObject.get.+:("date", Json.fromString(f.timestamp.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")))).toMap.map { f =>
- if(f._2.isNull)
- (f._1 -> "unknown")
- else if ("String".equalsIgnoreCase(f._2.name))
- (f._1 -> f._2.asString.get)
- else if("Number".equalsIgnoreCase(f._2.name))
- {
- (f._1 -> CommonUtil.roundDouble(f._2.asNumber.get.toDouble, 2))
- }
-
- else (f._1 -> f._2)
- }
- }
- series.map(f => JSONUtils.serialize(f))
- case "topn" =>
- val timeMap = Map("date" -> result.results.head.timestamp.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")))
- val series = result.results.map(f => f).head.result.asArray.get.map{f =>
- val dataMap = f.asObject.get.toMap.map{f =>
- if(f._2.isNull)
- (f._1 -> "unknown")
- else if ("String".equalsIgnoreCase(f._2.name))
- (f._1 -> f._2.asString.get)
- else if("Number".equalsIgnoreCase(f._2.name))
- (f._1 -> f._2.asNumber.get.toBigDecimal.get)
- else (f._1 -> f._2)
- }
- timeMap ++ dataMap
- }.toList
- series.map(f => JSONUtils.serialize(f))
- }
- }
- else
- List();
- }
-
- def getAggregation(query: DruidQueryModel): List[AggregationExpression] = {
- query.aggregations.getOrElse(List(org.ekstep.analytics.framework.Aggregation(None, "count", "count"))).map{f =>
- val aggType = AggregationType.decode(f.`type`).right.getOrElse(AggregationType.Count)
- getAggregationByType(aggType, f.name, f.fieldName, f.fnAggregate, f.fnCombine, f.fnReset)
- }
+ }
+
+ def getDruidQuery(query: DruidQueryModel): DruidNativeQuery = {
+ val dims = query.dimensions.getOrElse(List())
+ val druidQuery = DQL
+ .from(query.dataSource)
+ .granularity(CommonUtil.getGranularity(query.granularity.getOrElse("all")))
+ .interval(getIntervals(query))
+ query.queryType.toLowerCase() match {
+ case "groupby" => {
+ val DQLQuery = druidQuery.agg(getAggregation(query.aggregations): _*)
+ .groupBy(dims.map(f => getDimensionByType(f.`type`, f.fieldName, f.aliasName, f.outputType, f.extractionFn)): _*)
+ if (query.filters.nonEmpty) DQLQuery.where(getFilter(query.filters).get)
+ if (query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query.postAggregation).get: _*)
+ if (query.having.nonEmpty) DQLQuery.having(getGroupByHaving(query.having).get)
+ DQLQuery.build()
+ }
+ case "topn" => {
+ val DQLQuery = druidQuery.topN(getDimensionByType(dims.head.`type`, dims.head.fieldName,
+ dims.head.aliasName, dims.head.outputType, dims.head.extractionFn),
+ query.metric.getOrElse("count"), query.threshold.getOrElse(100).asInstanceOf[Int])
+ .agg(getAggregation(query.aggregations): _*)
+ if (query.filters.nonEmpty) DQLQuery.where(getFilter(query.filters).get)
+ if (query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query.postAggregation).get: _*)
+ DQLQuery.build()
+ }
+ case "timeseries" => {
+ val DQLQuery = druidQuery.agg(getAggregation(query.aggregations): _*)
+ if (query.filters.nonEmpty) DQLQuery.where(getFilter(query.filters).get)
+ if (query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query.postAggregation).get: _*)
+ DQLQuery.build()
+ }
+ case "scan" => {
+ val DQLQuery = druidQuery.scan()
+ if (query.filters.nonEmpty) DQLQuery.where(getFilter(query.filters).get)
+ if (query.columns.nonEmpty) DQLQuery.columns(query.columns.get)
+ DQLQuery.batchSize(AppConf.getConfig("druid.scan.batch.size").toInt)
+ DQLQuery.setQueryContextParam("maxQueuedBytes",AppConf.getConfig("druid.scan.batch.bytes"))
+ DQLQuery.build()
+ }
+
+ case _ =>
+ throw new DataFetcherException("Unknown druid query type found");
}
+ }
- def getAggregationByType(aggType: AggregationType, name: Option[String], fieldName: String, fnAggregate: Option[String], fnCombine: Option[String], fnReset: Option[String]): AggregationExpression = {
- aggType match {
- case AggregationType.Count => count as name.getOrElse(s"${AggregationType.Count.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.HyperUnique => dim(fieldName).hyperUnique as name.getOrElse(s"${AggregationType.HyperUnique.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.ThetaSketch => thetaSketch(Dim(fieldName)) as name.getOrElse(s"${AggregationType.ThetaSketch.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.Cardinality => cardinality(Dim(fieldName)) as name.getOrElse(s"${AggregationType.Cardinality.toString.toLowerCase}_${fieldName.toLowerCase()}")
- case AggregationType.LongSum => longSum(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongSum.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.DoubleSum => doubleSum(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleSum.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.DoubleMax => doubleMax(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleMax.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.DoubleMin => doubleMin(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleMin.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.LongMax => longMax(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongMax.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.LongMin => longMin(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongMin.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.DoubleFirst => doubleFirst(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleFirst.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.DoubleLast => doubleLast(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleLast.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.LongLast => longLast(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongLast.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.LongFirst =>longFirst(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongFirst.toString.toLowerCase()}_${fieldName.toLowerCase()}")
- case AggregationType.Javascript => ing.wbaa.druid.dql.AggregationOps.javascript(name.getOrElse(""), Iterable(fieldName), fnAggregate.get, fnCombine.get, fnReset.get)
+ def getIntervals(query: DruidQueryModel): String = {
+ if (query.granularity.getOrElse("all").toUpperCase == "LATEST_INDEX") {
+ var connection : Connection = null
+ var statement : Statement = null
+ try {
+ val connProperties: Properties = CommonUtil.getPostgresConnectionUserProps(AppConf.getConfig("postgres.druid.user")
+ , AppConf.getConfig("postgres.druid.pass"))
+ val db: String = AppConf.getConfig("postgres.druid.db")
+ val url: String = AppConf.getConfig("postgres.druid.url") + s"$db"
+ val getLatestIndexQuery = s"""select segment.start, segment.end from druid_segments segment where datasource = '${query.dataSource}' and used='t' order by start desc"""
+ connection = DriverManager.getConnection(url, connProperties)
+ statement = connection.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY)
+ val result: ResultSet = statement.executeQuery(getLatestIndexQuery)
+ if(result.first())
+ result.getString("start") + "/" + result.getString("end")
+ else
+ CommonUtil.getIntervalRange(query.intervals, query.dataSource, query.intervalSlider)
+ }finally{
+ statement.close()
+ connection.close()
}
+ } else {
+ CommonUtil.getIntervalRange(query.intervals, query.dataSource, query.intervalSlider)
}
+ }
+
+ def executeDruidQuery(model: DruidQueryModel,query: DruidNativeQuery)(implicit sc: SparkContext, fc: FrameworkContext): DruidResponse = {
+ val response = if(query.dataSource.contains("rollup") || query.dataSource.contains("distinct")
+ || query.dataSource.contains("snapshot")) fc.getDruidRollUpClient().doQuery(query)
+ else fc.getDruidClient().doQuery(query)
+ val queryWaitTimeInMins = AppConf.getConfig("druid.query.wait.time.mins").toLong
+ Await.result(response, scala.concurrent.duration.Duration.apply(queryWaitTimeInMins, "minute"))
+
+
+ }
+
+ def getSQLDruidQuery(model : DruidQueryModel) : DruidSQLQuery ={
+ val columns = model.sqlDimensions.get.map({f=>
+ if(f.function == None)
+ f.fieldName
+ else
+ f.function.get + "AS \"" + f.fieldName + "\""
+
+ })
+ val intervals = CommonUtil.getIntervalRange(model.intervals, model.dataSource, model.intervalSlider)
+ val sqlString = "SELECT " + columns.mkString(",") +
+ " from \"druid\".\"" + model.dataSource + "\" where " +
+ "__time >= '" + intervals.split("/").apply(0).split("T").apply(0) + "' AND __time < '"+
+ intervals.split("/").apply(1).split("T").apply(0) + "'"
+
+ DruidSQLQuery(sqlString)
+ }
+
+ def executeQueryAsStream(model: DruidQueryModel, query: DruidNativeQuery)(implicit sc: SparkContext, fc: FrameworkContext): RDD[String] = {
+
+ implicit val system = if (query.dataSource.contains("rollup") || query.dataSource.contains("distinct") || query.dataSource.contains("snapshot"))
+ fc.getDruidRollUpClient().actorSystem
+ else
+ fc.getDruidClient().actorSystem
+ implicit val materializer = ActorMaterializer()
+
+ val response =
+ if (query.dataSource.contains("rollup") || query.dataSource.contains("distinct") || query.dataSource.contains("snapshot"))
+ fc.getDruidRollUpClient().doQueryAsStream(query)
+ else
+ fc.getDruidClient().doQueryAsStream(query)
+
+ val druidResult: Future[RDD[String]] =
+ response
+ .via(new ResultAccumulator[BaseResult])
+ .map(f => processResult(model,f))
+ .map(sc.parallelize(_))
+ .toMat(Sink.fold[RDD[String], RDD[String]]((sc.emptyRDD[String]))(_ union _))(Keep.right).run()
- def getFilter(query: DruidQueryModel): Option[FilteringExpression] = {
- if (query.filters.nonEmpty) {
- val filters = query.filters.get.map { f =>
- val values = if (f.values.isEmpty && f.value.isEmpty) List() else if (f.values.isEmpty) List(f.value.get) else f.values.get
- getFilterByType(f.`type`, f.dimension, values)
+ val queryWaitTimeInMins = AppConf.getConfig("druid.query.wait.time.mins").toLong
+ Await.result(druidResult, scala.concurrent.duration.Duration.apply(queryWaitTimeInMins, "minute"))
+ }
+
+ def executeSQLQuery(model: DruidQueryModel, client: AkkaHttpClient)(implicit sc: SparkContext, fc: FrameworkContext): RDD[DruidOutput] = {
+
+ val druidQuery = getSQLDruidQuery(model)
+ fc.inputEventsCount = sc.longAccumulator("DruidDataCount")
+ implicit val system = fc.getDruidRollUpClient().actorSystem
+ implicit val materializer = ActorMaterializer()
+ implicit val ec: ExecutionContextExecutor = system.dispatcher
+ val url = String.format("%s://%s:%s%s%s", "http", AppConf.getConfig("druid.rollup.host"),
+ AppConf.getConfig("druid.rollup.port"), AppConf.getConfig("druid.url"), "sql")
+ val request = HttpRequest(method = HttpMethods.POST,
+ uri = url,
+ entity = HttpEntity(ContentTypes.`application/json`, JSONUtils.serialize(druidQuery)))
+ val responseFuture: Future[HttpResponse] = client.sendRequest(request)
+
+ val convertStringFlow =
+ Flow[ByteString].map(s => s.utf8String.trim)
+
+ val result = Source.fromFuture[HttpResponse](responseFuture)
+ .flatMapConcat(response => response.entity.withoutSizeLimit()
+ .dataBytes.via(Framing.delimiter(ByteString("\n"),
+ AppConf.getConfig("druid.scan.batch.bytes").toInt, true)))
+ .via(convertStringFlow).via(new ResultAccumulator[String])
+ .map(events => {
+ fc.inputEventsCount.add(events.filter(p=> !p.isEmpty).size)
+ sc.parallelize(events)
+ })
+ .toMat(Sink.fold[RDD[String], RDD[String]]((sc.emptyRDD[String]))(_ union _))(Keep.right).run()
+
+ val data = Await.result(result, scala.concurrent.duration.Duration.
+ apply(AppConf.getConfig("druid.query.wait.time.mins").toLong, "minute"))
+ data.filter(f => !f.isEmpty).map(f=> processSqlResult(f))
+ }
+
+
+ def processResult(query: DruidQueryModel, result: Seq[BaseResult])(implicit fc: FrameworkContext): Seq[String] = {
+ if (result.nonEmpty) {
+ fc.inputEventsCount.add(result.size)
+ query.queryType.toLowerCase match {
+ case "timeseries" | "groupby" =>
+ val series = result.asInstanceOf[List[DruidResult]].map { f =>
+ f.result.asObject.get.+:("date", Json.fromString(f.timestamp.get.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")))).toMap.map { f =>
+ if (f._2.isNull)
+ (f._1 -> "unknown")
+ else if ("String".equalsIgnoreCase(f._2.name))
+ (f._1 -> f._2.asString.get)
+ else if ("Number".equalsIgnoreCase(f._2.name)) {
+ (f._1 -> CommonUtil.roundDouble(f._2.asNumber.get.toDouble, 2))
+ } else (f._1 -> f._2)
}
- Option(conjunction(filters: _*))
- }
- else None
+ }
+ series.map(f => JSONUtils.serialize(f))
+ case "topn" =>
+ val timeMap = Map("date" -> result.head.timestamp.get.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")))
+ val series = result.asInstanceOf[List[DruidResult]].map(f => f).head.result.asArray.get.map { f =>
+ val dataMap = f.asObject.get.toMap.map { f =>
+ if (f._2.isNull)
+ (f._1 -> "unknown")
+ else if ("String".equalsIgnoreCase(f._2.name))
+ (f._1 -> f._2.asString.get)
+ else if ("Number".equalsIgnoreCase(f._2.name))
+ (f._1 -> f._2.asNumber.get.toBigDecimal.get)
+ else (f._1 -> f._2)
+ }
+ timeMap ++ dataMap
+ }.toList
+ series.map(f => JSONUtils.serialize(f))
+ case "scan"=>
+ val series = result.toList.asInstanceOf[List[DruidScanResult]].map { f =>
+ f.result.asObject.get.+:("date", Json.fromString(f.timestamp.get.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")))).toMap.map { f =>
+ if (f._2.isNull)
+ (f._1 -> "unknown")
+ else if ("String".equalsIgnoreCase(f._2.name))
+ (f._1 -> f._2.asString.get)
+ else if ("Number".equalsIgnoreCase(f._2.name)) {
+ (f._1 -> CommonUtil.roundDouble(f._2.asNumber.get.toDouble, 2))
+ } else {
+ (f._1 -> JSONUtils.deserialize[Map[String,Any]](JSONUtils.serialize(f._2)).get("value").get)
+ }
+ }
+ }
+ series.map(f => JSONUtils.serialize(f))
+ }
+ } else
+ List();
+ }
+ def processSqlResult(result: String): DruidOutput = {
+
+ val finalResult = JSONUtils.deserialize[Map[String,Any]](result)
+ val finalMap =finalResult.map(m => {
+ if(m._2== null)
+ (m._1, "unknown")
+ else if (m._2.isInstanceOf[String])
+ (m._1, if(m._2.toString.isEmpty) "unknown" else m._2)
+ else (m._1,m._2)})
+ DruidOutput(finalMap)
+ }
+
+ def getAggregation(aggregations: Option[List[org.ekstep.analytics.framework.Aggregation]]): List[AggregationExpression] = {
+ aggregations.getOrElse(List(org.ekstep.analytics.framework.Aggregation(None, "count", "count"))).map { f =>
+ val aggType = AggregationType.decode(f.`type`).right.getOrElse(AggregationType.Count)
+ getAggregationByType(aggType, f.name, f.fieldName, f.fnAggregate, f.fnCombine, f.fnReset, f.lgK, f.tgtHllType, f.round, f.filterAggType, f.filterFieldName, f.filterValue)
}
+ }
- def getFilterByType(filterType: String, dimension: String, values: List[AnyRef]): FilteringExpression = {
- filterType.toLowerCase match {
- case "isnull" => Dim(dimension).isNull
- case "isnotnull" => Dim(dimension).isNotNull
- case "equals" => Dim(dimension) === values.head.asInstanceOf[String]
- case "notequals" => Dim(dimension) =!= values.head.asInstanceOf[String]
- case "containsignorecase" => Dim(dimension).containsIgnoreCase(values.head.asInstanceOf[String])
- case "contains" => Dim(dimension).contains(values.head.asInstanceOf[String], true)
- case "in" => Dim(dimension) in values.asInstanceOf[List[String]]
- case "notin" => Dim(dimension) notIn values.asInstanceOf[List[String]]
- case "regex" => Dim(dimension) regex values.head.asInstanceOf[String]
- case "like" => Dim(dimension) like values.head.asInstanceOf[String]
- case "greaterthan" => Dim(dimension).between(values.head.asInstanceOf[Number].doubleValue(), Integer.MAX_VALUE, true, false)
- case "lessthan" => Dim(dimension).between(0, values.head.asInstanceOf[Number].doubleValue(), false, true)
- }
+ def getAggregationByType(aggType: AggregationType, name: Option[String], fieldName: String, fnAggregate: Option[String] = None, fnCombine: Option[String] = None, fnReset: Option[String] = None, lgk: Option[Int] = None, tgtHllType: Option[String] = None, round: Option[Boolean] = None, filterAggType: Option[String] = None, filterFieldName: Option[String] = None, filterValue: Option[AnyRef] = None): AggregationExpression = {
+ aggType match {
+ case AggregationType.Count => count as name.getOrElse(s"${AggregationType.Count.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.HyperUnique => dim(fieldName).hyperUnique as name.getOrElse(s"${AggregationType.HyperUnique.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.ThetaSketch => thetaSketch(Dim(fieldName)) as name.getOrElse(s"${AggregationType.ThetaSketch.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.Cardinality => cardinality(Dim(fieldName)) as name.getOrElse(s"${AggregationType.Cardinality.toString.toLowerCase}_${fieldName.toLowerCase()}")
+ case AggregationType.LongSum => longSum(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongSum.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.DoubleSum => doubleSum(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleSum.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.DoubleMax => doubleMax(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleMax.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.DoubleMin => doubleMin(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleMin.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.LongMax => longMax(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongMax.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.LongMin => longMin(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongMin.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.DoubleFirst => doubleFirst(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleFirst.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.DoubleLast => doubleLast(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleLast.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.LongLast => longLast(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongLast.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.LongFirst => longFirst(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongFirst.toString.toLowerCase()}_${fieldName.toLowerCase()}")
+ case AggregationType.Javascript => ing.wbaa.druid.dql.AggregationOps.javascript(name.getOrElse(""), Iterable(fieldName), fnAggregate.get, fnCombine.get, fnReset.get)
+ case AggregationType.HLLSketchMerge => ing.wbaa.druid.dql.AggregationOps.hllAggregator(fieldName, name.getOrElse(s"${AggregationType.HLLSketchMerge.toString.toLowerCase()}_${fieldName.toLowerCase()}"), lgk.getOrElse(12), tgtHllType.getOrElse("HLL_4"), round.getOrElse(true))
+ case AggregationType.Filtered => getFilteredAggregationByType(filterAggType, name, fieldName, filterFieldName, filterValue)
+ // case _ => throw new Exception("Unsupported aggregation type")
}
+ }
- def getPostAggregation(query: DruidQueryModel): Option[List[PostAggregationExpression]] = {
- if (query.postAggregation.nonEmpty) {
- Option(query.postAggregation.get.map { f =>
- PostAggregationType.decode(f.`type`) match {
- case Right(x) => getPostAggregationByType(x, f.name, f.fields, f.fn)
- case Left(x) => throw x
- }
- })
- }
- else None
+ def getFilteredAggregationByType(aggType: Option[String], name: Option[String], fieldName: String, filterFieldName: Option[String], filterValue: Option[AnyRef]): AggregationExpression = {
+ if (aggType.nonEmpty || filterFieldName.nonEmpty || filterValue.nonEmpty)
+ ing.wbaa.druid.dql.AggregationOps.selectorFiltered(filterFieldName.get, getAggregationByType(AggregationType.decode(aggType.get).right.get, name, fieldName), filterValue.get.toString)
+ else
+ throw new DataFetcherException("Missing fields for filter type aggregation");
+ }
+
+ def getFilter(filters: Option[List[DruidFilter]]): Option[FilteringExpression] = {
+
+ if (filters.nonEmpty) {
+ val filterExprs = filters.get.map { f =>
+ val values = if (f.values.isEmpty && f.value.isEmpty) List() else if (f.values.isEmpty) List(f.value.get) else f.values.get
+ getFilterByType(f.`type`, f.dimension, values)
+ }
+ Option(conjunction(filterExprs: _*))
+ } else None
+
+ }
+
+ def getFilterByType(filterType: String, dimension: String, values: List[AnyRef]): FilteringExpression = {
+ filterType.toLowerCase match {
+ case "isnull" => Dim(dimension).isNull
+ case "isnotnull" => Dim(dimension).isNotNull
+ case "equals" => Dim(dimension) === values.head.asInstanceOf[String]
+ case "notequals" => Dim(dimension) =!= values.head.asInstanceOf[String]
+ case "containsignorecase" => Dim(dimension).containsIgnoreCase(values.head.asInstanceOf[String])
+ case "contains" => Dim(dimension).contains(values.head.asInstanceOf[String], true)
+ case "in" => Dim(dimension) in values.asInstanceOf[List[String]]
+ case "notin" => Dim(dimension) notIn values.asInstanceOf[List[String]]
+ case "regex" => Dim(dimension) regex values.head.asInstanceOf[String]
+ case "like" => Dim(dimension) like values.head.asInstanceOf[String]
+ case "greaterthan" => Dim(dimension).between(values.head.asInstanceOf[Number].doubleValue(), Integer.MAX_VALUE, true, false)
+ case "lessthan" => Dim(dimension).between(0, values.head.asInstanceOf[Number].doubleValue(), false, true)
+ case _ => throw new Exception("Unsupported filter type")
}
+ }
+
+ def getPostAggregation(postAggregation: Option[List[org.ekstep.analytics.framework.PostAggregation]]): Option[List[PostAggregationExpression]] = {
+ if (postAggregation.nonEmpty) {
+ Option(postAggregation.get.map { f =>
+ PostAggregationType.decode(f.`type`) match {
+ case Right(x) => getPostAggregationByType(x, f.name, f.fields, f.fn)
+ case Left(x) => throw x
+ }
+ })
+ } else None
+ }
- def getPostAggregationByType(postAggType: PostAggregationType, name: String, fields: PostAggregationFields, fn: String): PostAggregationExpression = {
- postAggType match {
- case PostAggregationType.Arithmetic =>
- fn match {
- // only right field can have type as Constant or FieldAccess
- case "+" => if("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).+(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).+(Dim(fields.rightField.asInstanceOf[String])) as name
- case "-" => if("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).-(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).-(Dim(fields.rightField.asInstanceOf[String])) as name
- case "*" => if("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).*(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).*(Dim(fields.rightField.asInstanceOf[String])) as name
- case "/" => if("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField)./(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField)./(Dim(fields.rightField.asInstanceOf[String])) as name
- }
- case PostAggregationType.Javascript => javascript(name, Seq(Dim(fields.leftField),Dim(fields.rightField.asInstanceOf[String])), fn)
+ def getPostAggregationByType(postAggType: PostAggregationType, name: String, fields: PostAggregationFields, fn: String): PostAggregationExpression = {
+ postAggType match {
+ case PostAggregationType.Arithmetic =>
+ fn match {
+ // only right field can have type as Constant or FieldAccess
+ case "+" => if ("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).+(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).+(Dim(fields.rightField.asInstanceOf[String])) as name
+ case "-" => if ("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).-(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).-(Dim(fields.rightField.asInstanceOf[String])) as name
+ case "*" => if ("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).*(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).*(Dim(fields.rightField.asInstanceOf[String])) as name
+ case "/" => if ("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField)./(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField)./(Dim(fields.rightField.asInstanceOf[String])) as name
}
+ case PostAggregationType.Javascript =>
+ if(fields.rightField.asInstanceOf[String].isEmpty) javascript(name, Seq(Dim(fields.leftField)), fn)
+ else javascript(name, Seq(Dim(fields.leftField), Dim(fields.rightField.asInstanceOf[String])), fn)
+ case _ => throw new Exception("Unsupported post aggregation type")
}
+ }
- def getGroupByHaving(query: DruidQueryModel): Option[FilteringExpression] = {
+ def getGroupByHaving(having: Option[DruidHavingFilter]): Option[FilteringExpression] = {
- if (query.having.nonEmpty) {
- HavingType.decode(query.having.get.`type`) match {
- case Right(x) => Option(getGroupByHavingByType(x, query.having.get.aggregation, query.having.get.value))
- case Left(x) => throw x
- }
- }
- else None
+ if (having.nonEmpty) {
+ HavingType.decode(having.get.`type`) match {
+ case Right(x) => Option(getGroupByHavingByType(x, having.get.aggregation, having.get.value))
+ case Left(x) => throw x
+ }
+ } else None
+ }
+
+ def getGroupByHavingByType(postAggType: HavingType, field: String, value: AnyRef): FilteringExpression = {
+ postAggType match {
+ case HavingType.EqualTo => Dim(field) === value.asInstanceOf[String]
+ case HavingType.Not => Dim(field) =!= value.asInstanceOf[String]
+ case HavingType.GreaterThan => Dim(field) > value.asInstanceOf[Number].doubleValue()
+ case HavingType.LessThan => Dim(field) < value.asInstanceOf[Number].doubleValue()
+ case _ => throw new Exception("Unsupported group by having type")
}
+ }
- def getGroupByHavingByType(postAggType: HavingType, field: String, value: AnyRef): FilteringExpression = {
- postAggType match {
- case HavingType.EqualTo => Dim(field) === value.asInstanceOf[String]
- case HavingType.Not => Dim(field) =!= value.asInstanceOf[String]
- case HavingType.GreaterThan => Dim(field) > value.asInstanceOf[Number].doubleValue()
- case HavingType.LessThan => Dim(field) < value.asInstanceOf[Number].doubleValue()
- }
+ def getDimensionByType(`type`: Option[String], fieldName: String, aliasName: Option[String], outputType: Option[String] = None, extractionFn: Option[List[ExtractFn]] = None): Dim = {
+ `type`.getOrElse("default").toLowerCase match {
+ case "default" => Dim(fieldName, aliasName)
+ case "extraction" => Dim(fieldName,aliasName,outputType).extract(getExtractionFn(extractionFn.get.head))
+ case "cascade" => Dim(fieldName, aliasName, outputType).extract(CascadeExtractionFn(Seq(extractionFn.get.map(f => getExtractionFn(f)): _*)))
}
-}
+ }
+ def getExtractionFn(extractionFunc: ExtractFn): ExtractionFn = {
+ extractionFunc.`type`.toLowerCase match {
+ case "javascript" => JavascriptExtractionFn(extractionFunc.fn).asInstanceOf[ExtractionFn]
+ case "registeredlookup" => RegisteredLookupExtractionFn(extractionFunc.fn, extractionFunc.retainMissingValue, extractionFunc.replaceMissingValueWith).asInstanceOf[ExtractionFn]
+ }
+ }
+}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/CommonUtil.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/CommonUtil.scala
index 3b780cb0..9fcf5843 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/CommonUtil.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/CommonUtil.scala
@@ -2,14 +2,15 @@ package org.ekstep.analytics.framework.util
import java.io._
import java.net.URL
+import java.nio.file.Files
import java.nio.file.Paths.get
-import java.nio.file.{Files, Paths, StandardCopyOption}
import java.security.MessageDigest
import java.sql.Timestamp
-import java.util.{Date, Properties}
import java.util.zip.GZIPOutputStream
+import java.util.{Date, Properties}
import ing.wbaa.druid.definitions.{Granularity, GranularityType}
+import org.apache.hadoop.conf.Configuration
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
import org.ekstep.analytics.framework.Level._
@@ -24,7 +25,6 @@ import org.apache.commons.lang3.StringUtils
import org.joda.time.format.{DateTimeFormat, DateTimeFormatter}
import org.joda.time.{DateTime, DateTimeZone, Days, LocalDate, Weeks, Years}
import org.sunbird.cloud.storage.conf.AppConf
-
import scala.util.control.Breaks._
object CommonUtil {
@@ -39,6 +39,7 @@ object CommonUtil {
@transient val dayPeriod: DateTimeFormatter = DateTimeFormat.forPattern("yyyyMMdd").withZone(DateTimeZone.forOffsetHoursMinutes(5, 30));
@transient val monthPeriod: DateTimeFormatter = DateTimeFormat.forPattern("yyyyMM").withZone(DateTimeZone.forOffsetHoursMinutes(5, 30));
@transient val dayPeriodFormat: DateTimeFormatter = DateTimeFormat.forPattern("yyyyMMdd").withZoneUTC();
+ val offset: Long = DateTimeZone.forID("Asia/Kolkata").getOffset(DateTime.now())
def getParallelization(config: JobConfig): Int = {
@@ -52,7 +53,9 @@ object CommonUtil {
fc;
}
- def getSparkContext(parallelization: Int, appName: String, sparkCassandraConnectionHost: Option[AnyRef] = None, sparkElasticsearchConnectionHost: Option[AnyRef] = None): SparkContext = {
+ def getSparkContext(parallelization: Int, appName: String, sparkCassandraConnectionHost: Option[AnyRef] = None,
+ sparkElasticsearchConnectionHost: Option[AnyRef] = None, sparkRedisConnectionHost: Option[AnyRef] = None,
+ sparkRedisDB: Option[AnyRef] = None, sparkRedisPort: Option[AnyRef] = Option("6379")): SparkContext = {
JobLogger.log("Initializing Spark Context")
val conf = new SparkConf().setAppName(appName).set("spark.default.parallelism", parallelization.toString)
.set("spark.driver.memory", AppConf.getConfig("spark.driver_memory"))
@@ -67,6 +70,7 @@ object CommonUtil {
if (!conf.contains("spark.cassandra.connection.host"))
conf.set("spark.cassandra.connection.host", AppConf.getConfig("spark.cassandra.connection.host"))
+ // $COVERAGE-ON$
if (sparkCassandraConnectionHost.nonEmpty) {
conf.set("spark.cassandra.connection.host", sparkCassandraConnectionHost.get.asInstanceOf[String])
@@ -80,7 +84,12 @@ object CommonUtil {
conf.set("es.write.rest.error.handler.log.logger.level", "INFO")
}
- // $COVERAGE-ON$
+ if(sparkRedisConnectionHost.nonEmpty && sparkRedisDB.nonEmpty) {
+ conf.set("spark.redis.host", sparkRedisConnectionHost.get.asInstanceOf[String])
+ conf.set("spark.redis.port", sparkRedisPort.get.asInstanceOf[String])
+ conf.set("spark.redis.db", sparkRedisDB.get.asInstanceOf[String])
+ }
+
val sc = new SparkContext(conf)
setS3Conf(sc)
setAzureConf(sc)
@@ -89,12 +98,17 @@ object CommonUtil {
}
def getSparkSession(parallelization: Int, appName: String, sparkCassandraConnectionHost: Option[AnyRef] = None,
- sparkElasticsearchConnectionHost: Option[AnyRef] = None, readConsistencyLevel: Option[String] = None): SparkSession = {
+ sparkElasticsearchConnectionHost: Option[AnyRef] = None, readConsistencyLevel: Option[String] = None,
+ sparkRedisConnectionHost: Option[AnyRef] = None, sparkRedisDB: Option[AnyRef] = None,
+ sparkRedisPort: Option[AnyRef] = Option("6379")): SparkSession = {
JobLogger.log("Initializing SparkSession")
val conf = new SparkConf().setAppName(appName).set("spark.default.parallelism", parallelization.toString)
.set("spark.driver.memory", AppConf.getConfig("spark.driver_memory"))
.set("spark.memory.fraction", AppConf.getConfig("spark.memory_fraction"))
.set("spark.memory.storageFraction", AppConf.getConfig("spark.storage_fraction"))
+ .set("spark.sql.extensions", "com.datastax.spark.connector.CassandraSparkExtensions")
+ .set("directJoinSetting", "on")
+
val master = conf.getOption("spark.master")
// $COVERAGE-OFF$ Disabling scoverage as the below code cannot be covered as they depend on environment variables
if (master.isEmpty) {
@@ -103,14 +117,13 @@ object CommonUtil {
}
if (!conf.contains("spark.cassandra.connection.host"))
- conf.set("spark.cassandra.connection.host", AppConf.getConfig("spark.cassandra.connection.host"))
- if (embeddedCassandraMode)
- conf.set("spark.cassandra.connection.port", AppConf.getConfig("cassandra.service.embedded.connection.port"))
+ conf.set("spark.cassandra.connection.host", AppConf.getConfig("spark.cassandra.connection.host"))
+ // $COVERAGE-ON$
if (sparkCassandraConnectionHost.nonEmpty) {
conf.set("spark.cassandra.connection.host", sparkCassandraConnectionHost.get.asInstanceOf[String])
if (readConsistencyLevel.nonEmpty) {
- conf.set("spark.cassandra.input.consistency.level", readConsistencyLevel.get);
+ conf.set("spark.cassandra.input.consistency.level", readConsistencyLevel.get)
}
println("setting spark.cassandra.connection.host to lp-cassandra", conf.get("spark.cassandra.connection.host"))
}
@@ -121,10 +134,14 @@ object CommonUtil {
conf.set("es.write.rest.error.handler.log.logger.name", "org.ekstep.es.dispatcher")
conf.set("es.write.rest.error.handler.log.logger.level", "INFO")
conf.set("es.write.operation", "upsert")
+ }
+ if(sparkRedisConnectionHost.nonEmpty && sparkRedisDB.nonEmpty) {
+ conf.set("spark.redis.host", sparkRedisConnectionHost.get.asInstanceOf[String])
+ conf.set("spark.redis.port", sparkRedisPort.get.asInstanceOf[String])
+ conf.set("spark.redis.db", sparkRedisDB.get.asInstanceOf[String])
}
- // $COVERAGE-ON$
val sparkSession = SparkSession.builder().appName("sunbird-analytics").config(conf).getOrCreate()
setS3Conf(sparkSession.sparkContext)
setAzureConf(sparkSession.sparkContext)
@@ -132,11 +149,6 @@ object CommonUtil {
sparkSession
}
- private def embeddedCassandraMode(): Boolean = {
- val isEmbedded = AppConf.getConfig("cassandra.service.embedded.enable");
- StringUtils.isNotBlank(isEmbedded) && StringUtils.equalsIgnoreCase("true", isEmbedded);
- }
-
def setS3Conf(sc: SparkContext) = {
JobLogger.log("Configuring S3 AccessKey& SecrateKey to SparkContext")
sc.hadoopConfiguration.set("fs.s3n.awsAccessKeyId", AppConf.getAwsKey());
@@ -148,6 +160,7 @@ object CommonUtil {
val accKey = AppConf.getStorageSecret("azure")
sc.hadoopConfiguration.set("fs.azure", "org.apache.hadoop.fs.azure.NativeAzureFileSystem")
sc.hadoopConfiguration.set("fs.azure.account.key." + accName + ".blob.core.windows.net", accKey)
+ sc.hadoopConfiguration.set("fs.azure.account.keyprovider." + accName + ".blob.core.windows.net", "org.apache.hadoop.fs.azure.SimpleKeyProvider")
}
def closeSparkContext()(implicit sc: SparkContext) {
@@ -191,11 +204,6 @@ object CommonUtil {
Files.createDirectories(path);
}
- def copyFile(from: InputStream, path: String, fileName: String) = {
- createDirectory(path);
- Files.copy(from, Paths.get(path + fileName), StandardCopyOption.REPLACE_EXISTING);
- }
-
def deleteFile(file: String) {
JobLogger.log("Deleting file ", Option(file))
val path = get(file);
@@ -280,14 +288,6 @@ object CommonUtil {
if (event.gdata != null) event.gdata.ver else null;
}
- def getGameId(event: V3Event): String = {
- if (event.`object`.isEmpty) null else event.`object`.get.id;
- }
-
- def getGameVersion(event: V3Event): String = {
- if (event.`object`.isEmpty) null else event.`object`.get.ver.getOrElse(null);
- }
-
def getParallelization(config: Option[Map[String, String]]): Int = {
getParallelization(config.getOrElse(Map[String, String]()));
}
@@ -350,26 +350,6 @@ object CommonUtil {
zip.close()
}
- def zipFolder(outFile: String, dir: String) = {
- import java.io.{BufferedInputStream, FileInputStream, FileOutputStream}
- import java.util.zip.{ZipEntry, ZipOutputStream}
-
- val zip = new ZipOutputStream(new FileOutputStream(outFile))
- val files = new File(dir).listFiles();
- files.foreach { file =>
- zip.putNextEntry(new ZipEntry(file.getName.split("/").last))
- val in = new BufferedInputStream(new FileInputStream(file))
- var b = in.read()
- while (b > -1) {
- zip.write(b)
- b = in.read()
- }
- in.close()
- zip.closeEntry()
- }
- zip.close()
- }
-
// zipping nested directories
def zipDir(zipFileName: String, dir: String) {
val dirObj = new File(dir);
@@ -593,32 +573,6 @@ object CommonUtil {
x.toArray;
}
- def getValidTags(event: Any, registeredTags: Array[String]): Array[String] = {
-
- val appTag = if (event.isInstanceOf[DerivedEvent]) {
- event.asInstanceOf[DerivedEvent].etags.get.app
- } else if (event.isInstanceOf[Event]) {
- getETags(event.asInstanceOf[Event]).app
- } else if (event.isInstanceOf[V3Event]) {
- getETags(event.asInstanceOf[V3Event]).app
- } else {
- None
- }
- val dimTag = if (event.isInstanceOf[DerivedEvent]) {
- event.asInstanceOf[DerivedEvent].etags.get.dims
- } else if (event.isInstanceOf[Event]) {
- getETags(event.asInstanceOf[Event]).dims
- } else if (event.isInstanceOf[V3Event]) {
- getETags(event.asInstanceOf[V3Event]).dims
- } else {
- None
- }
- val genieTagFilter = if (appTag.isDefined) appTag.get else List()
- val dimTagFilter = if (dimTag.isDefined) dimTag.get else List()
- val tagFilter = genieTagFilter ++ dimTagFilter
- tagFilter.filter { x => registeredTags.contains(x) }.toArray;
- }
-
def getValidTagsForWorkflow(event: DerivedEvent, registeredTags: Array[String]): Array[String] = {
val tagFilter = if (event.tags != null && !event.tags.isEmpty) { event.tags.get.asInstanceOf[List[String]] } else List()
tagFilter.filter { x => registeredTags.contains(x) }.toArray;
@@ -664,7 +618,7 @@ object CommonUtil {
if (event.isInstanceOf[Event]) {
if (event.asInstanceOf[Event].channel.nonEmpty && StringUtils.isNotBlank(event.asInstanceOf[Event].channel.get)) event.asInstanceOf[Event].channel.get else defaultChannelId
} else if (event.isInstanceOf[V3Event]) {
- if (event.asInstanceOf[V3Event].context.channel.nonEmpty && StringUtils.isNotBlank(event.asInstanceOf[V3Event].context.channel)) event.asInstanceOf[V3Event].context.channel else defaultChannelId
+ if (StringUtils.isNotBlank(event.asInstanceOf[V3Event].context.channel)) event.asInstanceOf[V3Event].context.channel else defaultChannelId
} else if (event.isInstanceOf[DerivedEvent]) {
if (event.asInstanceOf[DerivedEvent].dimensions.channel.nonEmpty) event.asInstanceOf[DerivedEvent].dimensions.channel.get else if (StringUtils.isBlank(event.asInstanceOf[DerivedEvent].channel)) defaultChannelId else event.asInstanceOf[DerivedEvent].channel
} else if (event.isInstanceOf[ProfileEvent]) {
@@ -672,40 +626,6 @@ object CommonUtil {
} else defaultChannelId;
}
- def getETags(event: Event): ETags = {
- if (event.etags.isDefined) {
- event.etags.get;
- } else {
- if (event.tags != null) {
- val tags = event.tags.asInstanceOf[List[Map[String, List[String]]]]
- val genieTags = tags.filter(f => f.contains("genie")).map { x => x.get("genie").get }.flatMap { x => x }
- val partnerTags = tags.filter(f => f.contains("partner")).map { x => x.get("partner").get }.flatMap { x => x }
- val dims = tags.filter(f => f.contains("dims")).map { x => x.get("dims").get }.flatMap { x => x }
- ETags(Option(genieTags), Option(partnerTags), Option(dims))
- } else {
- ETags()
- }
-
- }
- }
-
- def getETags(event: V3Event): ETags = {
- if (event.tags != null && !event.tags.isEmpty) {
- val first = event.tags.apply(0)
- if (first.isInstanceOf[String]) {
- ETags(Option(event.tags.asInstanceOf[List[String]]))
- } else {
- val tags = event.tags.asInstanceOf[List[Map[String, List[String]]]]
- val genieTags = tags.filter(f => f.contains("genie")).map { x => x.get("genie").get }.flatMap { x => x }
- val partnerTags = tags.filter(f => f.contains("partner")).map { x => x.get("partner").get }.flatMap { x => x }
- val dims = tags.filter(f => f.contains("dims")).map { x => x.get("dims").get }.flatMap { x => x }
- ETags(Option(genieTags), Option(partnerTags), Option(dims))
- }
- } else {
- ETags()
- }
- }
-
def dayPeriodToLong(period: Int): Long = {
val p = period.toString()
if (8 == p.length()) {
@@ -725,40 +645,45 @@ object CommonUtil {
}
// parse druid query interval
- def getIntervalRange(period: String): String = {
+ def getIntervalRange(period: String, dataSource: String, intervalSlider: Int = 0): String = {
// LastDay, LastWeek, LastMonth, Last7Days, Last30Days
period match {
- case "LastDay" => getDayRange(1);
+ case "LastDay" => getDayRange(1, dataSource, intervalSlider);
case "LastWeek" => getWeekRange(1);
case "LastMonth" => getMonthRange(1);
- case "Last7Days" => getDayRange(7);
- case "Last30Days" => getDayRange(30);
+ case "Last7Days" => getDayRange(7, dataSource, intervalSlider);
+ case "Last30Days" => getDayRange(30, dataSource, intervalSlider);
case _ => period;
}
}
- def getDayRange(count: Int): String = {
- val endDate = DateTime.now(DateTimeZone.UTC);
- val startDate = endDate.minusDays(count).toString("yyyy-MM-dd");
- startDate + "/" + endDate.toString("yyyy-MM-dd")
+ def getDayRange(count: Int, dataSource: String, intervalSlider: Int): String = {
+ val endDate = if(dataSource.contains("rollup") || dataSource.contains("distinct")) DateTime.now(DateTimeZone.UTC).withTimeAtStartOfDay().minusDays(intervalSlider) else DateTime.now(DateTimeZone.UTC).withTimeAtStartOfDay().minusDays(intervalSlider).plus(offset)
+ val startDate = endDate.minusDays(count).toString("yyyy-MM-dd'T'HH:mm:ssZZ");
+ startDate + "/" + endDate.toString("yyyy-MM-dd'T'HH:mm:ssZZ")
}
def getMonthRange(count: Int): String = {
- val currentDate = DateTime.now(DateTimeZone.UTC);
- val startDate = currentDate.minusDays(count * 30).dayOfMonth().withMinimumValue().toString("yyyy-MM-dd");
- val endDate = currentDate.dayOfMonth().withMinimumValue().toString("yyyy-MM-dd");
+ val currentDate = DateTime.now(DateTimeZone.UTC).withTimeAtStartOfDay().plus(offset);
+ val startDate = currentDate.minusDays(count * 30).dayOfMonth().withMinimumValue().toString("yyyy-MM-dd'T'HH:mm:ssZZ");
+ val endDate = currentDate.dayOfMonth().withMinimumValue().toString("yyyy-MM-dd'T'HH:mm:ssZZ");
startDate + "/" + endDate
}
def getWeekRange(count: Int): String = {
- val currentDate = DateTime.now(DateTimeZone.UTC);
- val startDate = currentDate.minusDays(count * 7).dayOfWeek().withMinimumValue().toString("yyyy-MM-dd")
- val endDate = currentDate.dayOfWeek().withMinimumValue().toString("yyyy-MM-dd");
+ val currentDate = DateTime.now(DateTimeZone.UTC).withTimeAtStartOfDay().plus(offset);
+ val startDate = currentDate.minusDays(count * 7).dayOfWeek().withMinimumValue().toString("yyyy-MM-dd'T'HH:mm:ssZZ")
+ val endDate = currentDate.dayOfWeek().withMinimumValue().toString("yyyy-MM-dd'T'HH:mm:ssZZ");
startDate + "/" + endDate
}
def getGranularity(value: String): Granularity = {
- GranularityType.decode(value).right.getOrElse(GranularityType.All)
+ value.toLowerCase match {
+ case "latest_index" =>
+ GranularityType.decode("all").right.getOrElse(GranularityType.All)
+ case _ =>
+ GranularityType.decode(value).right.getOrElse(GranularityType.All)
+ }
}
def getMetricEvent(params: Map[String, AnyRef], producerId: String, producerPid: String): V3DerivedEvent = {
@@ -777,9 +702,46 @@ object CommonUtil {
}
def getPostgresConnectionProps(): Properties = {
+ val connProperties = new Properties()
val user = AppConf.getConfig("postgres.user")
val pass = AppConf.getConfig("postgres.pass")
+ connProperties.setProperty("driver", "org.postgresql.Driver")
+ connProperties.setProperty("user", user)
+ connProperties.setProperty("password", pass)
+ connProperties
+ }
+
+ def getS3File(bucket: String, file: String): String = {
+ "s3n://" + bucket + "/" + file;
+ }
+
+ def getS3FileWithoutPrefix(bucket: String, file: String): String = {
+ bucket + "/" + file;
+ }
+
+ def getAzureFile(bucket: String, file: String, storageKey: String = "azure_storage_key"): String = {
+ "wasb://" + bucket + "@" + AppConf.getConfig(storageKey) + ".blob.core.windows.net/" + file;
+ }
+
+ def getAzureFileWithoutPrefix(bucket: String, file: String, storageKey: String = "azure_storage_key"): String = {
+ bucket + "@" + AppConf.getConfig(storageKey) + ".blob.core.windows.net/" + file;
+ }
+
+ def setStorageConf(store: String, accountKey: Option[String], accountSecret: Option[String])(implicit sc: SparkContext): Configuration = {
+ store.toLowerCase() match {
+ case "s3" =>
+ sc.hadoopConfiguration.set("fs.s3n.awsAccessKeyId", AppConf.getConfig(accountKey.getOrElse("aws_storage_key")));
+ sc.hadoopConfiguration.set("fs.s3n.awsSecretAccessKey", AppConf.getConfig(accountSecret.getOrElse("aws_storage_secret")));
+ case "azure" =>
+ sc.hadoopConfiguration.set("fs.azure", "org.apache.hadoop.fs.azure.NativeAzureFileSystem")
+ sc.hadoopConfiguration.set("fs.azure.account.key." + AppConf.getConfig(accountKey.getOrElse("azure_storage_key")) + ".blob.core.windows.net", AppConf.getConfig(accountSecret.getOrElse("azure_storage_secret")))
+ case _ =>
+ // Do nothing
+ }
+ sc.hadoopConfiguration
+ }
+ def getPostgresConnectionUserProps(user:String,pass: String): Properties = {
val connProperties = new Properties()
connProperties.setProperty("driver", "org.postgresql.Driver")
connProperties.setProperty("user", user)
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/DatasetUtil.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/DatasetUtil.scala
new file mode 100644
index 00000000..4006f498
--- /dev/null
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/DatasetUtil.scala
@@ -0,0 +1,86 @@
+package org.ekstep.analytics.framework.util
+
+import java.nio.file.Paths
+
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.functions.col
+import org.ekstep.analytics.framework.StorageConfig
+
+class DatasetExt(df: Dataset[Row]) {
+
+ private val fileUtil = new HadoopFileUtil();
+
+ private def getTempDir(filePrefix: String, reportId: String): String = {
+ Paths.get(filePrefix, reportId, "/_tmp/").toString()
+ }
+
+ private def getFinalDir(filePrefix: String, reportId: String): String = {
+ Paths.get(filePrefix, reportId).toString();
+ }
+
+ private def filePaths(dims: Seq[String], row: Row, format: String, tempDir: String, finalDir: String): (String, String) = {
+
+ val dimPaths = for(dim <- dims) yield {
+ dim + "=" + row.get(row.fieldIndex(dim))
+ }
+
+ val paths = for(dim <- dims) yield {
+ row.get(row.fieldIndex(dim))
+ }
+
+ (Paths.get(tempDir, dimPaths.mkString("/")).toString(), Paths.get(finalDir, paths.mkString("/")) + "." + format)
+ }
+
+ def saveToBlobStore(storageConfig: StorageConfig, format: String, reportId: String, options: Option[Map[String, String]], partitioningColumns: Option[Seq[String]]): List[String] = {
+
+ val conf = df.sparkSession.sparkContext.hadoopConfiguration;
+
+ val file = storageConfig.store.toLowerCase() match {
+ case "s3" =>
+ CommonUtil.getS3FileWithoutPrefix(storageConfig.container, storageConfig.fileName);
+ case "azure" =>
+ CommonUtil.getAzureFileWithoutPrefix(storageConfig.container, storageConfig.fileName, storageConfig.accountKey.getOrElse("azure_storage_key"))
+ case _ =>
+ storageConfig.fileName
+ }
+
+ val filePrefix = storageConfig.store.toLowerCase() match {
+ case "s3" =>
+ "s3n://"
+ case "azure" =>
+ "wasb://"
+ case _ =>
+ ""
+ }
+
+ val tempDir = getTempDir(file, reportId);
+ val finalDir = getFinalDir(file, reportId);
+
+ val dims = partitioningColumns.getOrElse(Seq());
+
+ fileUtil.delete(conf, filePrefix + tempDir)
+ val opts = options.getOrElse(Map());
+ val files = if(dims.nonEmpty) {
+ val map = df.select(dims.map(f => col(f)):_*).distinct().collect().map(f => filePaths(dims, f, format, tempDir, finalDir)).toMap
+ df.repartition(1).write.format(format).options(opts).partitionBy(dims: _*).save(filePrefix + tempDir);
+ map.foreach(f => {
+ fileUtil.delete(conf, filePrefix + f._2)
+ fileUtil.copyMerge(filePrefix + f._1, filePrefix + f._2, conf, true);
+ })
+ map.map(f => filePrefix + f._2).toList
+ } else {
+ df.repartition(1).write.format(format).options(opts).save(filePrefix + tempDir);
+ fileUtil.delete(conf, filePrefix + finalDir + "." + format)
+ fileUtil.copyMerge(filePrefix + tempDir, filePrefix + finalDir + "." + format, conf, true);
+ List(filePrefix + finalDir + "." + format)
+ }
+ fileUtil.delete(conf, filePrefix + tempDir)
+ files
+ }
+
+}
+
+object DatasetUtil {
+ implicit def extensions(df: Dataset[Row]) = new DatasetExt(df);
+
+}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/HadoopFileUtil.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/HadoopFileUtil.scala
new file mode 100644
index 00000000..29c64f54
--- /dev/null
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/HadoopFileUtil.scala
@@ -0,0 +1,69 @@
+package org.ekstep.analytics.framework.util
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
+import org.apache.hadoop.io.IOUtils
+
+import scala.util.Try
+
+class HadoopFileUtil {
+
+ /**
+ * Delete a single file.
+ */
+ def delete(file: String, conf: Configuration) : Boolean = {
+
+ val path = new Path(file);
+ val fileSystem = path.getFileSystem(conf);
+ fileSystem.delete(path, true);
+ }
+
+ def copy(srcFile: String, destFile: String, conf: Configuration) : String = {
+
+ val src = new Path(srcFile);
+ val fileSystem = src.getFileSystem(conf);
+ fileSystem.copyToLocalFile(false, src, new Path(destFile))
+ destFile
+ }
+
+ /**
+ * Delete multiple files. Different file sources (aws, azure etc) can be passed here
+ */
+ def delete(conf: Configuration, files: String*) : Seq[Boolean] = {
+
+ for(file <- files) yield {
+ val path = new Path(file);
+ path.getFileSystem(conf).delete(path, true);
+ }
+
+ }
+
+ /**
+ * Merge a hadoop source folder/file into another file
+ */
+ def copyMerge(srcPath: String, destPath: String, conf: Configuration, deleteSrc: Boolean) {
+
+ val srcFilePath = new Path(srcPath);
+ val destFilePath = new Path(destPath);
+ copyMerge(srcFilePath.getFileSystem(conf), srcFilePath, destFilePath.getFileSystem(conf), destFilePath, deleteSrc, conf)
+ }
+
+ def copyMerge(srcFS: FileSystem, srcDir: Path, dstFS: FileSystem, dstFile: Path,
+ deleteSource: Boolean, conf: Configuration): Boolean = {
+
+ if (srcFS.exists(srcDir) && srcFS.getFileStatus(srcDir).isDirectory) {
+ val outputFile = dstFS.create(dstFile)
+ Try {
+ srcFS.listStatus(srcDir).sortBy(_.getPath.getName)
+ .collect {
+ case status if status.isFile() =>
+ val inputFile = srcFS.open(status.getPath())
+ Try(IOUtils.copyBytes(inputFile, outputFile, conf, false))
+ inputFile.close()
+ }
+ }
+ outputFile.close()
+ if (deleteSource) srcFS.delete(srcDir, true) else true
+ } else false
+ }
+}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JSONUtils.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JSONUtils.scala
index b3fe63b7..cd46cab5 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JSONUtils.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JSONUtils.scala
@@ -30,16 +30,10 @@ object JSONUtils {
@throws(classOf[Exception])
def deserialize[T: Manifest](value: String): T = mapper.readValue(value, typeReference[T]);
- @throws(classOf[Exception])
- def unescapeJSON(string: String): String = {
- StringEscapeUtils.unescapeJava(string)
- }
-
private[this] def typeReference[T: Manifest] = new TypeReference[T] {
override def getType = typeFromManifest(manifest[T])
}
-
private[this] def typeFromManifest(m: Manifest[_]): Type = {
if (m.typeArguments.isEmpty) { m.runtimeClass }
// $COVERAGE-OFF$Disabling scoverage as this code is impossible to test
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JobLogger.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JobLogger.scala
index 2c3a2459..02fac8ce 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JobLogger.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JobLogger.scala
@@ -13,10 +13,13 @@ import org.apache.logging.log4j.core.layout.PatternLayout
import java.nio.charset.Charset
import org.apache.logging.log4j.core.config.AppenderRef
+import org.ekstep.analytics.framework.dispatcher.KafkaDispatcher
import org.joda.time.DateTime
object JobLogger {
+ implicit val fc = new FrameworkContext();
+
def init(jobName: String) = {
System.setProperty("logFilename", jobName.toLowerCase());
val ctx = LogManager.getContext(false).asInstanceOf[LoggerContext];
@@ -29,31 +32,35 @@ object JobLogger {
}
private def info(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) {
- logger(name).info(JSONUtils.serialize(getV3JobEvent("JOB_LOG", "INFO", msg, data, None, pdata_id, pdata_pid)));
+ val event = JSONUtils.serialize(getV3JobEvent("JOB_LOG", "INFO", msg, data, None, pdata_id, pdata_pid))
+ logEvent(event, name, INFO)
}
private def debug(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) {
- logger(name).debug(JSONUtils.serialize(getV3JobEvent("JOB_LOG", "DEBUG", msg, data, None, pdata_id, pdata_pid)))
+ val event = JSONUtils.serialize(getV3JobEvent("JOB_LOG", "DEBUG", msg, data, None, pdata_id, pdata_pid))
+ logger(name).debug(event);
}
private def error(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) {
- logger(name).error(JSONUtils.serialize(getV3JobEvent("JOB_LOG", "ERROR", msg, data, None, pdata_id, pdata_pid)));
+ val event = JSONUtils.serialize(getV3JobEvent("JOB_LOG", "ERROR", msg, data, None, pdata_id, pdata_pid))
+ logEvent(event, name, ERROR)
}
private def warn(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) {
- logger(name).debug(JSONUtils.serialize(getV3JobEvent("JOB_LOG", "WARN", msg, data, None, pdata_id, pdata_pid)))
+ val event = JSONUtils.serialize(getV3JobEvent("JOB_LOG", "WARN", msg, data, None, pdata_id, pdata_pid))
+ logger(name).debug(event);
}
def start(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) = {
val event = JSONUtils.serialize(getV3JobEvent("JOB_START", "INFO", msg, data, None, pdata_id, pdata_pid));
EventBusUtil.dipatchEvent(event);
- logger(name).info(event);
+ logEvent(event, name, INFO)
}
def end(msg: String, status: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) = {
val event = JSONUtils.serialize(getV3JobEvent("JOB_END", "INFO", msg, data, Option(status), pdata_id, pdata_pid));
EventBusUtil.dipatchEvent(event);
- logger(name).info(event);
+ logEvent(event, name, INFO)
}
def log(msg: String, data: Option[AnyRef] = None, logLevel: Level = DEBUG, name: String = "org.ekstep.analytics")(implicit className: String) = {
@@ -69,6 +76,26 @@ object JobLogger {
}
}
+ def logEvent(event: String, name: String = "org.ekstep.analytics", logLevel: Level = DEBUG) = {
+ if (StringUtils.equalsIgnoreCase(AppConf.getConfig("log.appender.kafka.enable"), "true")) {
+ val brokerList = AppConf.getConfig("log.appender.kafka.broker_host")
+ val topic = AppConf.getConfig("log.appender.kafka.topic")
+ KafkaDispatcher.dispatch(Array(event), Map("brokerList" -> brokerList, "topic" -> topic))
+ }
+ else {
+ logLevel match {
+ case INFO =>
+ logger(name).info(event);
+ case DEBUG =>
+ logger(name).debug(event);
+ case WARN =>
+ logger(name).debug(event);
+ case ERROR =>
+ logger(name).error(event);
+ }
+ }
+ }
+
private def getV3JobEvent(eid: String, level: String, msg: String, data: Option[AnyRef], status: Option[String] = None, pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String): V3DerivedEvent = {
val measures = Map(
"class" -> className,
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/RestUtil.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/RestUtil.scala
index 7ff21e89..8f294f15 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/RestUtil.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/RestUtil.scala
@@ -8,7 +8,7 @@ import org.ekstep.analytics.framework.Level._
import scala.io.Source
trait HTTPClient {
- def get[T](apiURL: String)(implicit mf: Manifest[T]): T
+ def get[T](apiURL: String, requestHeaders: Option[Map[String, String]] = None)(implicit mf: Manifest[T]): T
def post[T](apiURL: String, body: String, requestHeaders: Option[Map[String, String]] = None)(implicit mf: Manifest[T]): T
def patch[T](apiURL: String, body: String, headers: Option[Map[String,String]] = None)(implicit mf: Manifest[T]): T
def put[T](apiURL:String, body:String,headers:Option[Map[String,String]] = None)(implicit mf:Manifest[T]):T
@@ -41,9 +41,12 @@ object RestUtil extends HTTPClient{
}
}
- def get[T](apiURL: String)(implicit mf: Manifest[T]) = {
+ def get[T](apiURL: String, headers: Option[Map[String,String]] = None)(implicit mf: Manifest[T]) = {
val request = new HttpGet(apiURL);
request.addHeader("user-id", "analytics");
+ headers.getOrElse(Map()).foreach { header =>
+ request.addHeader(header._1, header._2)
+ }
try {
_call(request.asInstanceOf[HttpRequestBase]);
} catch {
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/ResultAccumulator.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/ResultAccumulator.scala
new file mode 100644
index 00000000..6746da4e
--- /dev/null
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/ResultAccumulator.scala
@@ -0,0 +1,56 @@
+package org.ekstep.analytics.framework.util
+
+import akka.stream.stage.{GraphStage, GraphStageLogic, InHandler, OutHandler}
+import akka.stream.{Attributes, FlowShape, Inlet, Outlet}
+import org.ekstep.analytics.framework.conf.AppConf
+
+import scala.collection.immutable
+
+final class ResultAccumulator[E] extends GraphStage[FlowShape[E, immutable.Seq[E]]] {
+
+ val in = Inlet[E]("ResultAccumulator.in")
+ val out = Outlet[immutable.Seq[E]]("ResultAccumulator.out")
+
+ override def shape = FlowShape.of(in, out)
+
+ override def createLogic(attributes: Attributes) = new GraphStageLogic(shape) {
+
+ private var counter: Int = 0
+ private val buffer = Vector.newBuilder[E]
+
+ setHandlers(in, out, new InHandler with OutHandler {
+
+ override def onPush(): Unit = {
+ val nextElement = grab(in)
+ counter += 1
+
+ if (counter < AppConf.getConfig("druid.query.batch.buffer").toLong) {
+ buffer += nextElement
+ pull(in)
+ } else {
+ val result = buffer.result().toList
+ buffer.clear()
+ buffer += nextElement
+ counter = 0
+ push(out, result)
+ }
+ }
+
+ override def onPull(): Unit = {
+ pull(in)
+ }
+
+ override def onUpstreamFinish(): Unit = {
+ val result = buffer.result().toList
+ if (result.nonEmpty) {
+ emit(out, result)
+ }
+ completeStage()
+ }
+ })
+
+ override def postStop(): Unit = {
+ buffer.clear()
+ }
+ }
+}
\ No newline at end of file
diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/streaming/KafkaEventProducer.scala b/analytics-core/src/main/scala/org/ekstep/analytics/streaming/KafkaEventProducer.scala
index a9b4d164..a67c0baa 100644
--- a/analytics-core/src/main/scala/org/ekstep/analytics/streaming/KafkaEventProducer.scala
+++ b/analytics-core/src/main/scala/org/ekstep/analytics/streaming/KafkaEventProducer.scala
@@ -24,14 +24,17 @@ object KafkaEventProducer {
implicit val className: String = "KafkaEventProducer";
- def init(brokerList: String): KafkaProducer[String, String] = {
+ def init(brokerList: String, batchSize: Integer, lingerMs: Integer): KafkaProducer[String, String] = {
// Zookeeper connection properties
val props = new HashMap[String, Object]()
- props.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, 3000L.asInstanceOf[Long]);
+ props.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize);
+ props.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 60000.asInstanceOf[Integer]);
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
+ props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy")
+ props.put(ProducerConfig.LINGER_MS_CONFIG, lingerMs)
new KafkaProducer[String, String](props);
}
@@ -40,15 +43,15 @@ object KafkaEventProducer {
producer.close();
}
- def sendEvent(event: AnyRef, topic: String, brokerList: String) = {
- val producer = init(brokerList);
+ def sendEvent(event: AnyRef, topic: String, brokerList: String, batchSize: Integer, lingerMs: Integer) = {
+ val producer = init(brokerList, batchSize, lingerMs);
val message = new ProducerRecord[String, String](topic, null, JSONUtils.serialize(event));
producer.send(message);
close(producer);
}
- def sendEvents(events: Buffer[AnyRef], topic: String, brokerList: String) = {
- val producer = init(brokerList);
+ def sendEvents(events: Buffer[AnyRef], topic: String, brokerList: String, batchSize: Integer, lingerMs: Integer) = {
+ val producer = init(brokerList, batchSize, lingerMs);
events.foreach { event =>
{
val message = new ProducerRecord[String, String](topic, null, JSONUtils.serialize(event));
@@ -59,8 +62,8 @@ object KafkaEventProducer {
}
@throws(classOf[DispatcherException])
- def sendEvents(events: Array[String], topic: String, brokerList: String) = {
- val producer = init(brokerList);
+ def sendEvents(events: Array[String], topic: String, brokerList: String, batchSize: Integer, lingerMs: Integer) = {
+ val producer = init(brokerList, batchSize, lingerMs);
events.foreach { event =>
{
val message = new ProducerRecord[String, String](topic, event);
@@ -70,8 +73,8 @@ object KafkaEventProducer {
close(producer);
}
- def publishEvents(events: Buffer[String], topic: String, brokerList: String) = {
- val producer = init(brokerList);
+ def publishEvents(events: Buffer[String], topic: String, brokerList: String, batchSize: Integer, lingerMs: Integer) = {
+ val producer = init(brokerList, batchSize, lingerMs);
events.foreach { event =>
{
val message = new ProducerRecord[String, String](topic, null, event);
diff --git a/analytics-core/src/test/resources/application.conf b/analytics-core/src/test/resources/application.conf
index c97bf37c..b45684b3 100644
--- a/analytics-core/src/test/resources/application.conf
+++ b/analytics-core/src/test/resources/application.conf
@@ -26,13 +26,34 @@ druid = {
datasource = "summary-events"
response-parsing-timeout = 300000
}
-druid.query.wait.time.mins=1
+druid.rollup.host="localhost"
+druid.rollup.port=8082
+druid.query.wait.time.mins=5
druid.report.upload.wait.time.mins=1
-
+druid.scan.batch.size=100
+druid.scan.batch.bytes=2000000
+druid.query.batch.buffer=10
spark.memory_fraction=0.3
spark.storage_fraction=0.5
spark.driver_memory=1g
+druid.latestindex.query="select segment.start, segment.end from druid_segments segment where datasource = 'content-model-snapshot' and used='t' order by start"
//postgres configuration
postgres.user="postgres"
postgres.pass="postgres"
+
+postgres.druid.db="postgres"
+postgres.druid.url="jdbc:postgresql://localhost:65124/"
+postgres.druid.user="postgres"
+postgres.druid.pass="postgres"
+
+azure_storage_key = azure-test-key
+azure_storage_secret = azure-test-secret
+
+aws_storage_key = aws-test-key
+aws_storage_secret = aws-test-secret
+
+# Joblog Kafka appender config for cluster execution
+log.appender.kafka.enable="false"
+log.appender.kafka.broker_host="localhost:9092"
+log.appender.kafka.topic="telemetry.log"
\ No newline at end of file
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFetcher.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFetcher.scala
index 86f12b42..ecd0a14a 100644
--- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFetcher.scala
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFetcher.scala
@@ -5,6 +5,8 @@ import org.ekstep.analytics.framework.util.JSONUtils
import org.scalamock.scalatest.MockFactory
import org.scalatest.Matchers
import org.sunbird.cloud.storage.BaseStorageService
+import org.ekstep.analytics.framework.fetcher.S3DataFetcher
+import org.ekstep.analytics.framework.fetcher.AzureDataFetcher
/**
* @author Santhosh
@@ -26,6 +28,7 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory {
it should "fetch the events from local file" in {
implicit val fc = new FrameworkContext();
+ fc.inputEventsCount = sc.longAccumulator("Count");
val search = Fetcher("local", None, Option(Array(
Query(None, None, None, None, None, None, None, None, None, Option("src/test/resources/sample_telemetry.log"))
)));
@@ -43,12 +46,19 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory {
)));
val rdd1 = DataFetcher.fetchBatchData[TestDataFetcher](search1);
rdd1.count should be (0)
+
+ val search2 = Fetcher("local", None, Option(Array(
+ Query(None, None, None, None, None, None, None, None, None, None)
+ )));
+ val rdd2 = DataFetcher.fetchBatchData[TestDataFetcher](search2);
+ rdd1.count should be (0)
}
it should "fetch no file from S3 and return an empty RDD" in {
implicit val mockFc = mock[FrameworkContext];
val mockStorageService = mock[BaseStorageService]
+ mockFc.inputEventsCount = sc.longAccumulator("Count");
(mockFc.getStorageService(_:String):BaseStorageService).expects("aws").returns(mockStorageService);
(mockStorageService.searchObjects _).expects("dev-data-store", "abc/", Option("2012-01-01"), Option("2012-02-01"), None, "yyyy-MM-dd").returns(null);
(mockStorageService.getPaths _).expects("dev-data-store", null).returns(List("src/test/resources/sample_telemetry_2.log"))
@@ -78,8 +88,9 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory {
it should "fetch the batch events from azure" in {
implicit val mockFc = mock[FrameworkContext];
+ mockFc.inputEventsCount = sc.longAccumulator("Count");
val mockStorageService = mock[BaseStorageService]
- (mockFc.getStorageService(_:String):BaseStorageService).expects("azure").returns(mockStorageService);
+ (mockFc.getStorageService(_:String, _:String, _:String):BaseStorageService).expects("azure", "azure_storage_key", "azure_storage_secret").returns(mockStorageService);
(mockStorageService.searchObjects _).expects("dev-data-store", "raw/", Option("2017-08-31"), Option("2017-08-31"), None, "yyyy-MM-dd").returns(null);
(mockStorageService.getPaths _).expects("dev-data-store", null).returns(List("src/test/resources/sample_telemetry_2.log"))
val queries = Option(Array(
@@ -92,7 +103,7 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory {
it should "invoke the druid data fetcher" in {
implicit val fc = new FrameworkContext();
- val unknownQuery = DruidQueryModel("scan", "telemetry-events", "LastWeek", Option("day"), None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))))
+ val unknownQuery = DruidQueryModel("time", "telemetry-events", "LastWeek", Option("day"), None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))))
the[DataFetcherException] thrownBy {
DataFetcher.fetchBatchData[TimeSeriesData](Fetcher("druid", None, None, Option(unknownQuery)));
} should have message "Unknown druid query type found"
@@ -104,4 +115,51 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory {
val rdd = DataFetcher.fetchBatchData[Event](Fetcher("none", None, None));
rdd.isEmpty() should be (true)
}
+
+ it should "cover the missing branches in S3DataFetcher, AzureDataFetcher and DruidDataFetcher" in {
+ implicit val fc = new FrameworkContext();
+ var query = JSONUtils.deserialize[Query]("""{"bucket":"test-container","prefix":"test/","folder":"true","endDate":"2020-01-10"}""")
+ S3DataFetcher.getObjectKeys(Array(query)).head should be ("s3n://test-container/test/2020-01-10")
+ AzureDataFetcher.getObjectKeys(Array(query)).head should be ("wasb://test-container@azure-test-key.blob.core.windows.net/test/2020-01-10")
+
+ query = JSONUtils.deserialize[Query]("""{"bucket":"test-container","prefix":"test/","folder":"true","endDate":"2020-01-10","excludePrefix":"test"}""")
+ S3DataFetcher.getObjectKeys(Array(query)).size should be (0)
+ AzureDataFetcher.getObjectKeys(Array(query)).size should be (0)
+
+ }
+
+
+ it should "check for getFilteredKeys from azure via partitions" in {
+
+ // with single partition
+ val query1 = Query(Option("dev-data-store"), Option("raw/"), Option("2020-06-10"), Option("2020-06-11"), None, None, None, None, None, None, None, None, None, None, Option(List(0)))
+ val keys1 = DataFetcher.getFilteredKeys(query1, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-1-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), Option(List(0)))
+ keys1.length should be (2)
+ keys1.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-0-1591845501666.json.gz")
+
+ // with mutilple partition
+ val query2 = Query(Option("dev-data-store"), Option("raw/"), Option("2020-06-11"), Option("2020-06-11"), None, None, None, None, None, None, None, None, None, None, Option(List(0,1)))
+ val keys2 = DataFetcher.getFilteredKeys(query2, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), Option(List(0,1)))
+ keys2.length should be (2)
+ keys2.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz")
+
+ // without partition
+ val query3 = Query(Option("dev-data-store"), Option("raw/"), Option("2020-06-11"), Option("2020-06-11"), None, None, None, None, None, None, None, None, None, None, None)
+ val keys3 = DataFetcher.getFilteredKeys(query3, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), None)
+ keys3.length should be (2)
+ keys3.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz")
+
+ // without only end date
+ val query4 = Query(Option("dev-data-store"), Option("raw/"), None, Option("2020-06-11"), None, None, None, None, None, None, None, None, None, None, Option(List(0,1)))
+ val keys4 = DataFetcher.getFilteredKeys(query4, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), Option(List(0,1)))
+ keys4.length should be (2)
+ keys4.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz")
+
+ // without only end date and delta
+ val query5 = Query(Option("dev-data-store"), Option("raw/"), None, Option("2020-06-11"), Option(1), None, None, None, None, None, None, None, None, None, Option(List(0)))
+ val keys5 = DataFetcher.getFilteredKeys(query5, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-1-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), Option(List(0)))
+ keys5.length should be (2)
+ keys5.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-0-1591845501666.json.gz")
+ }
+
}
\ No newline at end of file
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFilter.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFilter.scala
index 7e4347f8..d6524acc 100644
--- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFilter.scala
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFilter.scala
@@ -36,32 +36,14 @@ class TestDataFilter extends SparkSpec {
val filteredEvents = DataFilter.filterAndSort[Event](events, filters, None);
filteredEvents.count() should be (20);
filteredEvents.first().eid should be("GE_GENIE_START")
+
+ DataFilter.filter[Event, String](events, "GE_GENIE_START", (event: Event, id: String) => {
+ id.equals(event.eid)
+ }).count() should be (20);
+
+ DataFilter.filter[Event, String](events, "GE_GENIE_START", null).count() should be (7437);
}
- it should "filter the events where game id equals org.ekstep.aser" in {
- val filters = Option(Array[Filter](
- Filter("gameId", "EQ", Option("org.ekstep.aser"))
- ));
- val filteredEvents = DataFilter.filterAndSort(events, filters, None);
- filteredEvents.count() should be (6276);
- filteredEvents.first().gdata.id should be("genie.android")
- }
-
- it should "filter the events where game id not equals org.ekstep.aser" in {
- val filters = Option(Array[Filter](
- Filter("gameId", "NE", Option("org.ekstep.aser"))
- ));
- val filteredEvents = DataFilter.filterAndSort(events, filters, None);
- filteredEvents.count() should be (1161);
- }
-
- it should "filter the events by game version" in {
- val filters = Option(Array[Filter](
- Filter("gameVersion", "EQ", Option("3.0.26"))
- ));
- val filteredEvents = DataFilter.filterAndSort(events, filters, None);
- filteredEvents.count() should be (1413);
- }
it should "filter by custom key using bean property matching " in {
val filters = Option(Array[Filter](
@@ -138,7 +120,7 @@ class TestDataFilter extends SparkSpec {
it should "filter by two criteria" in {
val filters = Option(Array[Filter](
Filter("eventId", "IN", Option(List("OE_ASSESS", "OE_LEVEL_SET"))),
- Filter("gameId", "EQ", Option("org.ekstep.aser"))
+ Filter("gdata.id", "EQ", Option("org.ekstep.aser"))
));
val filteredEvents = DataFilter.filterAndSort(events, filters, None);
filteredEvents.count() should be (1872);
@@ -247,14 +229,6 @@ class TestDataFilter extends SparkSpec {
result1(0).id should be ("Two");
}
- it should "filter by genie tag" in {
- val filteredEvents = DataFilter.filter(events, Filter("genieTag", "IN", Option(List("e4d7a0063b665b7a718e8f7e4014e59e28642f8c"))));
- filteredEvents.count() should be (3);
-
- val filteredEvents2 = DataFilter.filter(events, Filter("genieTag", "IN", Option(List("e4d7a0063b665b7a718e8f7e4014e59e28642f9c"))));
- filteredEvents2.count() should be (2);
- }
-
it should "filter events using range" in {
val date = CommonUtil.dateFormat.parseDateTime("2015-09-23");
@@ -282,7 +256,7 @@ class TestDataFilter extends SparkSpec {
val date = new DateTime()
val filters: Array[Filter] = Array(
Filter("eventts", "RANGE", Option(Map("start" -> 0L, "end" -> date.getMillis))),
- Filter("genieTag", "IN", Option("")))
+ Filter("tags", "IN", Option("")))
DataFilter.matches(inputEvent.first(), filters) should be(false)
DataFilter.matches(inputEvent.first(), Filter("eventts", "RANGE", Option(Map("start" -> 0L, "end" -> date.getMillis)))) should be(true)
DataFilter.matches(inputEvent.first(), Array[Filter]()) should be(true)
@@ -304,6 +278,27 @@ class TestDataFilter extends SparkSpec {
);
val filteredEvents = DataFilter.filter(rddData, filters);
filteredEvents.count() should be (0);
+
+ DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("intCol", "LT", Option(3.asInstanceOf[AnyRef]))) should be (false)
+ DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("longCol", "LT", Option(9L.asInstanceOf[AnyRef]))) should be (false)
+ DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 3.0, sdf.parse("2019-11-11")), Filter("doubleCol", "LT", Option(2.0.asInstanceOf[AnyRef]))) should be (false)
+ DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("dateCol", "LT", Option("2019-11-10".asInstanceOf[AnyRef]))) should be (false)
+ DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("dateCol", "RANGE", Option(Map("start" -> "2019-11-10", "end" -> "2019-11-14")))) should be (true)
+ DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("dateCol", "RANGE", Option(Map("start" -> "2019-11-07", "end" -> "2019-11-09")))) should be (false)
+ }
+
+ it should "cover all uncovered branches" in {
+ DataFilter.matches[MeasuredEvent](MeasuredEvent(null, 0l, 123l, null, null, null, null, None, None, null, null, null), Filter("eventts", "RANGE", Option(Map("start" -> 0L, "end" -> 124l)))) should be (true)
+
+ case class Event1(val eid: String, val ts: String, val ets: Long, val `@timestamp`: String)
+ DataFilter.matches[Event1](Event1(null, "", 123l, "2016-01-02T00:59:22.924Z"), Filter("eventts", "EQ", Option(1451696362924l.asInstanceOf[AnyRef]))) should be (true)
+
+ @scala.beans.BeanInfo
+ case class Event2(eid: String, tags: List[String])
+ DataFilter.matches[Event2](Event2("Test", List("tag1", "tag2", "tag3")), Filter("tags", "IN", Option(List("tag2")))) should be (true)
+ DataFilter.matches[Event2](Event2("Test", List("tag1", "tag2", "tag3")), Filter("tags", "NIN", Option(List("tag2")))) should be (false)
+ DataFilter.matches[Event2](Event2("Test", List("tag1", "tag2", "tag3")), Filter("eid", "NE", Option("Test"))) should be (false)
+ DataFilter.matches[Event2](Event2("Test", List("tag1", "tag2", "tag3")), Filter("eid", "NE", None)) should be (true)
}
}
\ No newline at end of file
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestFrameworkContext.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestFrameworkContext.scala
new file mode 100644
index 00000000..b13ddf96
--- /dev/null
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestFrameworkContext.scala
@@ -0,0 +1,53 @@
+package org.ekstep.analytics.framework
+
+import java.text.SimpleDateFormat
+
+import org.scalatest._
+import org.apache.spark.rdd.RDD
+import org.ekstep.analytics.framework.util.CommonUtil
+import org.apache.spark.SparkContext
+import org.json4s.DefaultFormats
+import org.json4s.jackson.JsonMethods
+import com.fasterxml.jackson.core.JsonParseException
+import org.ekstep.analytics.framework.exception.DataFilterException
+import org.apache.spark.SparkException
+import org.ekstep.analytics.framework.util.JSONUtils
+
+import scala.collection.mutable.Buffer
+import java.util.Date
+
+import org.joda.time.DateTime
+
+
+/**
+ * @author Santhosh
+ */
+class TestFrameworkContext extends BaseSpec with BeforeAndAfterAll {
+
+ "FrameworkContext" should "test all methods" in {
+
+ val fc = new FrameworkContext();
+
+ noException should be thrownBy {
+ fc.shutdownStorageService();
+ }
+
+ fc.initialize(Option(Array(("azure", "local", "local"))));
+ fc.getStorageService("azure", "local", "local") should not be (null)
+
+ fc.storageContainers.clear();
+ fc.getStorageService("azure") should not be (null)
+
+ fc.setDruidClient(null, null);
+ noException should be thrownBy {
+ fc.shutdownDruidClient();
+ }
+
+ fc.getDruidClient() should not be (null);
+ fc.setDruidClient(fc.getDruidClient(), fc.getDruidRollUpClient())
+
+ fc.getAkkaHttpUtil() should not be (null)
+ fc.closeContext();
+ }
+
+}
\ No newline at end of file
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestOutputDispatcher.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestOutputDispatcher.scala
index 67de3b27..2536671e 100644
--- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestOutputDispatcher.scala
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestOutputDispatcher.scala
@@ -9,6 +9,9 @@ import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException
import org.scalamock.scalatest.MockFactory
import org.scalatest.Matchers
import org.sunbird.cloud.storage.BaseStorageService
+import org.ekstep.analytics.framework.dispatcher.S3Dispatcher
+import org.apache.hadoop.fs.azure.AzureException
+import org.ekstep.analytics.framework.dispatcher.ConsoleDispatcher
/**
* @author Santhosh
@@ -30,38 +33,6 @@ class TestOutputDispatcher extends SparkSpec("src/test/resources/sample_telemetr
OutputDispatcher.dispatch(Dispatcher("console", Map()), sc.parallelize(events.take(1)));
}
- val eventsInArray = events.map { x => JSONUtils.serialize(x) }.collect
- noException should be thrownBy {
- OutputDispatcher.dispatch(Dispatcher("console", Map()), eventsInArray);
- }
- }
-
- it should "dispatch output to s3" in {
-
- implicit val mockFc = mock[FrameworkContext];
- val mockStorageService = mock[BaseStorageService]
- (mockFc.getStorageService(_: String): BaseStorageService).expects("aws").returns(mockStorageService).anyNumberOfTimes();
- (mockStorageService.upload _).expects("dev-data-store", *, *, Option(false), None, None, None).returns(null).anyNumberOfTimes();
- (mockStorageService.closeContext _).expects().returns().anyNumberOfTimes()
- val output1 = Dispatcher("s3file", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log1.json", "filePath" -> "src/test/resources/sample_telemetry.log", "zip" -> true.asInstanceOf[AnyRef]));
- val output2 = Dispatcher("s3file", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log1.json", "filePath" -> "src/test/resources/sample_telemetry.log.gz"));
- val output3 = Dispatcher("s3file", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log2.json"));
- noException should be thrownBy {
- OutputDispatcher.dispatch(output1, events);
- OutputDispatcher.dispatch(output2, events);
- OutputDispatcher.dispatch(output3, events);
- }
-
- val output4 = Dispatcher("s3", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log1.json", "filePath" -> "src/test/resources/sample_telemetry.log", "zip" -> true.asInstanceOf[AnyRef]));
- val output5 = Dispatcher("s3", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log1.json", "filePath" -> "src/test/resources/sample_telemetry.log.gz"));
- val output6 = Dispatcher("s3", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log2.json"));
- val eventRDDString = events.map(f => JSONUtils.serialize(f)).collect();
- //noException should be thrownBy {
- OutputDispatcher.dispatch(output4, eventRDDString);
- OutputDispatcher.dispatch(output5, eventRDDString);
- OutputDispatcher.dispatch(output6, eventRDDString);
- //}
-
}
it should "throw dispatcher exceptions" in {
@@ -103,20 +74,32 @@ class TestOutputDispatcher extends SparkSpec("src/test/resources/sample_telemetr
// Invoke S3 dispatcher without required fields ('bucket','key')
a[DispatcherException] should be thrownBy {
- OutputDispatcher.dispatch(Dispatcher("s3", Map[String, AnyRef]("zip" -> true.asInstanceOf[AnyRef])), events);
- OutputDispatcher.dispatch(Dispatcher("s3", Map[String, AnyRef]("bucket" -> Option("test"))), events);
- OutputDispatcher.dispatch(Dispatcher("s3File", Map[String, AnyRef]("zip" -> true.asInstanceOf[AnyRef])), events);
- OutputDispatcher.dispatch(Dispatcher("s3File", Map[String, AnyRef]("bucket" -> Option("test"))), events);
+ OutputDispatcher.dispatch(Dispatcher("s3", Map[String, AnyRef]("key" -> "testKey")), events);
}
-
- // Invoke dispatch with null dispatcher
+
a[DispatcherException] should be thrownBy {
- OutputDispatcher.dispatch(null.asInstanceOf[Dispatcher], events);
+ OutputDispatcher.dispatch(Dispatcher("s3", Map[String, AnyRef]("bucket" -> "testBucket")), events);
+ }
+
+ a[DispatcherException] should be thrownBy {
+ OutputDispatcher.dispatch(StorageConfig("s3", null, null), events);
+ }
+
+ a[DispatcherException] should be thrownBy {
+ OutputDispatcher.dispatch(StorageConfig("file", "test", null), events);
+ }
+
+ a[DispatcherException] should be thrownBy {
+ OutputDispatcher.dispatch(null.asInstanceOf[StorageConfig], events);
+ }
+
+ a[DispatcherException] should be thrownBy {
+ ConsoleDispatcher.dispatch(events.map(f => JSONUtils.serialize(f)), StorageConfig("file", "test", null));
}
- val eventsInArray = events.map { x => JSONUtils.serialize(x) }.collect
+ // Invoke dispatch with null dispatcher
a[DispatcherException] should be thrownBy {
- OutputDispatcher.dispatch(null.asInstanceOf[Dispatcher], eventsInArray);
+ OutputDispatcher.dispatch(null.asInstanceOf[Dispatcher], events);
}
// Invoke dispatch with None dispatchers
@@ -136,7 +119,6 @@ class TestOutputDispatcher extends SparkSpec("src/test/resources/sample_telemetr
OutputDispatcher.dispatch(Option(Array(Dispatcher("console", Map("printEvent" -> false.asInstanceOf[AnyRef])))), noEvents);
}
- OutputDispatcher.dispatch(Dispatcher("console", Map("printEvent" -> false.asInstanceOf[AnyRef])), Array[String]());
}
it should "execute test cases related to script dispatcher" in {
@@ -155,52 +137,57 @@ class TestOutputDispatcher extends SparkSpec("src/test/resources/sample_telemetr
val f = new File("src/test/resources/test_output.log");
f.exists() should be(true)
CommonUtil.deleteFile("src/test/resources/test_output.log");
+
+ OutputDispatcher.dispatch(StorageConfig("local", null, "src/test/resources/test_output.log"), events);
+ val f2 = new File("src/test/resources/test_output.log");
+ f2.exists() should be(true)
+ CommonUtil.deleteFile("src/test/resources/test_output.log");
}
+
+ it should "give DispatcherException if azure config is missing " in {
- it should "dispatch output to azure" in {
-
- implicit val mockFc = mock[FrameworkContext];
- val mockStorageService = mock[BaseStorageService]
- (mockFc.getStorageService(_: String): BaseStorageService).expects("azure").returns(mockStorageService).anyNumberOfTimes();
- (mockStorageService.upload _).expects("dev-data-store", *, *, Option(false), None, None, None).returns(null).anyNumberOfTimes();
- (mockStorageService.closeContext _).expects().returns().anyNumberOfTimes()
- val date = System.currentTimeMillis()
- val output1 = Dispatcher("azure", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> s"output/test-dispatcher1-$date.json", "zip" -> true.asInstanceOf[AnyRef]));
- val output2 = Dispatcher("azure", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> s"output/test-dispatcher2-$date.json", "filePath" -> "src/test/resources/sample_telemetry.log"));
- val strData = events.map(f => JSONUtils.serialize(f))
-
- noException should be thrownBy {
- OutputDispatcher.dispatch(output2, strData.collect());
- }
+ implicit val fc = new FrameworkContext();
+ val eventArr = events.map(f => JSONUtils.serialize(f)).cache();
+
+ the[DispatcherException] thrownBy {
+ AzureDispatcher.dispatch(Map[String, AnyRef]("key" -> "output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/"), eventArr);
+ } should have message "'bucket' & 'key' parameters are required to send output to azure"
+
+ the[DispatcherException] thrownBy {
+ AzureDispatcher.dispatch(Map[String, AnyRef]("bucket" -> "test-bucket", "dirPath" -> "src/test/resources/1234/OE_INTERACT/"), eventArr);
+ } should have message "'bucket' & 'key' parameters are required to send output to azure"
+
+ the[DispatcherException] thrownBy {
+ OutputDispatcher.dispatch(StorageConfig("azure", "test-bucket", null), eventArr);
+ } should have message "'bucket' & 'key' parameters are required to send output to azure"
+
+ the[DispatcherException] thrownBy {
+ OutputDispatcher.dispatch(StorageConfig("azure", null, "output/test-directory/"), eventArr);
+ } should have message "'bucket' & 'key' parameters are required to send output to azure"
}
- it should "dispatch directory to azure" in {
- implicit val mockFc = mock[FrameworkContext];
- val mockStorageService = mock[BaseStorageService]
- (mockFc.getStorageService(_: String): BaseStorageService).expects("azure").returns(mockStorageService).anyNumberOfTimes();
- (mockStorageService.upload _).expects("dev-data-store", *, *, Option(true), *, Option(3), *).returns("").anyNumberOfTimes();
- (mockStorageService.closeContext _).expects().returns().anyNumberOfTimes()
- //noException should be thrownBy {
- AzureDispatcher.dispatchDirectory(Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> s"output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/"));
- //}
- }
-
- it should "give DispatcherException if azure config is missing " in {
+ it should "dispatch output to S3/Azure" in {
implicit val fc = new FrameworkContext();
- the[DispatcherException] thrownBy {
- AzureDispatcher.dispatchDirectory(Map[String, AnyRef]("key" -> s"output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/"));
- } should have message "'local file path', 'bucket' & 'key' parameters are required to upload directory to azure"
- the[DispatcherException] thrownBy {
- AzureDispatcher.dispatch(Map[String, AnyRef]("key" -> s"output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/"), events.map(f => JSONUtils.serialize(f)));
- } should have message "'bucket' & 'key' parameters are required to send output to azure"
+ a[AzureException] should be thrownBy {
+ AzureDispatcher.dispatch(Map[String, AnyRef]("key" -> "test_key", "bucket" -> "test_bucket"), events.map(f => JSONUtils.serialize(f)));
+ }
+
+ a[AzureException] should be thrownBy {
+ OutputDispatcher.dispatch(StorageConfig("azure", "test_bucket", "test_key", Option("azure_storage_key")), events.map(f => JSONUtils.serialize(f)));
+ }
+
+ a[AzureException] should be thrownBy {
+ OutputDispatcher.dispatch(StorageConfig("azure", "test_bucket", "test_key"), events.map(f => JSONUtils.serialize(f)));
+ }
+
+ a[IllegalArgumentException] should be thrownBy {
+ S3Dispatcher.dispatch(Map[String, AnyRef]("key" -> "test_key", "bucket" -> "test_bucket"), events.map(f => JSONUtils.serialize(f)));
+ }
- the[DispatcherException] thrownBy {
- AzureDispatcher.dispatch(events.map(f => JSONUtils.serialize(f)).collect(), Map[String, AnyRef]("key" -> s"output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/"));
- } should have message "'bucket' & 'key' parameters are required to send output to azure"
}
it should "dispatch output to elastic-search" in {
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/dispatcher/TestSlackDispatcher.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/dispatcher/TestSlackDispatcher.scala
index 76b81856..0bcac88d 100644
--- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/dispatcher/TestSlackDispatcher.scala
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/dispatcher/TestSlackDispatcher.scala
@@ -23,6 +23,10 @@ class TestSlackDispatcher extends SparkSpec {
the[DispatcherException] thrownBy {
SlackDispatcher.dispatch(Map("channel" -> "testing"), sc.parallelize(List("test")));
} should have message "'channel' & 'userName' parameters are required to send output to slack"
+
+ the[DispatcherException] thrownBy {
+ SlackDispatcher.dispatch(Map("userName" -> "testing"), sc.parallelize(List("test")));
+ } should have message "'channel' & 'userName' parameters are required to send output to slack"
}
}
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/factory/TestDispatcherFactory.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/factory/TestDispatcherFactory.scala
index 3ab30360..7ecb557b 100644
--- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/factory/TestDispatcherFactory.scala
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/factory/TestDispatcherFactory.scala
@@ -7,11 +7,11 @@ class TestDispatcherFactory extends BaseSpec {
it should "return a Model class for a model code" in {
- val dispatcherList = List(Dispatcher("s3file", Map()), Dispatcher("s3", Map()), Dispatcher("kafka", Map()), Dispatcher("script", Map()),
+ val dispatcherList = List(Dispatcher("s3", Map()), Dispatcher("kafka", Map()), Dispatcher("script", Map()),
Dispatcher("console", Map()), Dispatcher("file", Map()), Dispatcher("azure", Map()), Dispatcher("slack", Map()), Dispatcher("elasticsearch", Map()))
val dispatchers = dispatcherList.map { f => DispatcherFactory.getDispatcher(f) }
- dispatchers(1) should be(S3Dispatcher)
+ dispatchers(0) should be(S3Dispatcher)
}
}
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/fetcher/TestDruidDataFetcher.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/fetcher/TestDruidDataFetcher.scala
index 59eeedde..58806243 100644
--- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/fetcher/TestDruidDataFetcher.scala
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/fetcher/TestDruidDataFetcher.scala
@@ -2,6 +2,10 @@ package org.ekstep.analytics.framework.fetcher
import java.time.{ZoneOffset, ZonedDateTime}
+import akka.actor.ActorSystem
+import akka.http.scaladsl.model._
+import akka.stream.scaladsl.Source
+import akka.util.ByteString
import cats.syntax.either._
import ing.wbaa.druid._
import ing.wbaa.druid.client.DruidClient
@@ -9,13 +13,37 @@ import ing.wbaa.druid.definitions.{AggregationType, PostAggregationType}
import io.circe._
import io.circe.parser._
import org.ekstep.analytics.framework._
+import org.ekstep.analytics.framework.util.{CommonUtil, EmbeddedPostgresqlService, HTTPClient, JSONUtils}
import org.scalamock.scalatest.MockFactory
import org.scalatest.Matchers
+import org.joda.time.DateTimeUtils
+import org.sunbird.cloud.storage.conf.AppConf
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
+
+
class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory {
+ override def beforeAll () {
+ super.beforeAll()
+ EmbeddedPostgresqlService.start()
+ EmbeddedPostgresqlService.createNominationTable()
+ }
+
+ it should "check for getDimensionByType methods" in {
+ val defaultExpr = DruidDataFetcher.getDimensionByType(None, "field", Option("field1"))
+ defaultExpr.toString should be ("Dim(field,Some(field1),None,None)")
+
+ val javascriptExtractionExpr = DruidDataFetcher.getDimensionByType(Option("extraction"), "field", Option("field1"), Option("String"), Option(List(ExtractFn("javascript", "function(x) { return x + 10; }"))))
+ javascriptExtractionExpr.toString should be ("Dim(field,Some(field1),Some(String),Some(JavascriptExtractionFn(function(x) { return x + 10; },Some(false))))")
+
+ val lookupExtractionExpr = DruidDataFetcher.getDimensionByType(Option("extraction"), "field", Option("field1"), Option("String"), Option(List(ExtractFn("registeredlookup", "channel"))))
+ lookupExtractionExpr.toString should be ("Dim(field,Some(field1),Some(String),Some(RegisteredLookupExtractionFn(channel,Some(false),None)))")
+
+ val cascadeExtractionExpr = DruidDataFetcher.getDimensionByType(Option("cascade"), "field", Option("field1"), Option("String"), Option(List(ExtractFn("registeredlookup", "channel"),ExtractFn("javascript", "function(x) { return x + 10; }"))))
+ cascadeExtractionExpr.toString should be ("Dim(field,Some(field1),Some(String),Some(CascadeExtractionFn(List(RegisteredLookupExtractionFn(channel,Some(false),None), JavascriptExtractionFn(function(x) { return x + 10; },Some(false))))))")
+ }
it should "check for getAggregationTypes methods" in {
@@ -69,6 +97,24 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory {
val javascriptExprWithoutName = DruidDataFetcher.getAggregationByType(AggregationType.Javascript, None, "field",
Option("function(current, edata_size) { return current + (edata_size == 0 ? 1 : 0); }"),
Option("function(partialA, partialB) { return partialA + partialB; }"), Option("function () { return 0; }"))
+
+ a[Exception] should be thrownBy {
+ DruidDataFetcher.getAggregationByType(AggregationType.Filtered, Option("Last"), "field", None, None, None)
+ }
+
+ a[Exception] should be thrownBy {
+ DruidDataFetcher.getAggregationByType(AggregationType.Filtered, Option("Last"), "field", None, None, None, None, None, None, Option("longSum"))
+ }
+
+ a[Exception] should be thrownBy {
+ DruidDataFetcher.getAggregationByType(AggregationType.Filtered, Option("Last"), "field", None, None, None, None, None, None, Option("longSum"), Option("edata_size"))
+ }
+
+ val filteredExp = DruidDataFetcher.getAggregationByType(AggregationType.Filtered, Option("Last"), "field", None, None, None, None, None, None, Option("longSum"), Option("edata_size"), Option(0.asInstanceOf[AnyRef]))
+ filteredExp.toString should be("SelectorFilteredAgg(edata_size,Some(0),LongSumAggregation(Last,field),None)")
+
+ DruidDataFetcher.getAggregation(Option(List(Aggregation(Option("count"), "test", "field")))).head.getName should be ("count");
+
}
it should "check for getFilterTypes methods" in {
@@ -97,6 +143,41 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory {
val greaterThanExpr = DruidDataFetcher.getFilterByType("greaterthan", "field", List(0.asInstanceOf[AnyRef]))
val lessThanExpr = DruidDataFetcher.getFilterByType("lessthan", "field", List(1000.asInstanceOf[AnyRef]))
+
+ a[Exception] should be thrownBy {
+ DruidDataFetcher.getFilterByType("test", "field", List(1000.asInstanceOf[AnyRef]))
+ }
+
+ DruidDataFetcher.getFilter(None) should be (None)
+
+ DruidDataFetcher.getFilter(Option(List(DruidFilter("in", "eid", None, None)))).get.asFilter.toString() should be ("AndFilter(List(InFilter(eid,List(),None)))")
+ DruidDataFetcher.getFilter(Option(List(DruidFilter("in", "eid", Option("START"), None)))).get.asFilter.toString() should be ("AndFilter(List(InFilter(eid,List(START),None)))")
+ }
+
+ it should "check for getGroupByHaving methods" in {
+
+ var filteringExpr = DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef])));
+ filteringExpr.get.asFilter.toString() should be ("BoundFilter(doubleSum,None,Some(20.0),None,Some(true),Some(Numeric),None)")
+
+ filteringExpr = DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("equalTo", "user_id", "user1")));
+ filteringExpr.get.asFilter.toString() should be ("SelectFilter(user_id,Some(user1),None)")
+
+ filteringExpr = DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("not", "user_id", "user1")));
+ filteringExpr.get.asFilter.toString() should be ("NotFilter(SelectFilter(user_id,Some(user1),None))")
+
+ filteringExpr = DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("greaterThan", "doubleSum", 20.asInstanceOf[AnyRef])));
+ filteringExpr.get.asFilter.toString() should be ("BoundFilter(doubleSum,Some(20.0),None,Some(true),None,Some(Numeric),None)")
+
+ a[Exception] should be thrownBy {
+ DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("and", "doubleSum", 20.asInstanceOf[AnyRef])));
+ }
+
+ a[Exception] should be thrownBy {
+ DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("in", "doubleSum", 20.asInstanceOf[AnyRef])));
+ }
+
+ DruidDataFetcher.getGroupByHaving(None) should be (None);
+
}
it should "check for getPostAggregation methods" in {
@@ -113,15 +194,62 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory {
val divisionExpr = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Division", PostAggregationFields("field", ""), "/")
divisionExpr.getName.toString should be ("Division")
- val javaScriptExpr = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Javascript, "Percentage", PostAggregationFields("fieldA", "fieldB"), "function(a, b) { return (a / b) * 100; }")
+ val javaScriptExpr1 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Javascript, "Percentage", PostAggregationFields("fieldA", "fieldB"), "function(a, b) { return (a / b) * 100; }")
+ javaScriptExpr1.toString should be ("JavascriptPostAgg(List(fieldA, fieldB),function(a, b) { return (a / b) * 100; },Some(Percentage))")
+
+ val javaScriptExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Javascript, "MultiplyBy100", PostAggregationFields("fieldA", ""), "function(a) { return a * 100; }")
+ javaScriptExpr2.toString should be ("JavascriptPostAgg(List(fieldA),function(a) { return a * 100; },Some(MultiplyBy100))")
+
+ val additionExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Addition", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "+")
+ additionExpr2.getName.toString should be ("Addition")
+
+ val subtractionExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Subtraction", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "-")
+ subtractionExpr2.getName.toString should be ("Subtraction")
+
+ val multiplicationExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Product", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "*")
+ multiplicationExpr2.getName.toString should be ("Product")
+
+ val divisionExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Division", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "/")
+ divisionExpr2.getName.toString should be ("Division")
+
+ a[Exception] should be thrownBy {
+ DruidDataFetcher.getPostAggregation(Option(List(PostAggregation("longLeast", "Division", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "/"))))
+ }
+
+ a[Exception] should be thrownBy {
+ DruidDataFetcher.getPostAggregation(Option(List(PostAggregation("test", "Division", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "/"))))
+ }
+
+ DruidDataFetcher.getPostAggregation(None) should be (None);
+
}
-
+
+ it should "test the getDruidQuery method" in {
+ var query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), None, None, None)
+ var druidQuery = DruidDataFetcher.getDruidQuery(query)
+ druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),None,List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,None,None,List(),Map())");
+
+ query = DruidQueryModel("topN", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), Option(List(Aggregation(Option("count"), "count", ""))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")))), None, None, None)
+ druidQuery = DruidDataFetcher.getDruidQuery(query)
+ druidQuery.toString() should be ("TopNQuery(DefaultDimension(context_pdata_id,Some(producer_id),None),100,count,List(CountAggregation(count)),List(2019-11-01/2019-11-02),Day,None,List(),Map())");
+
+ query = DruidQueryModel("timeSeries", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), None, None, None, None, None)
+ druidQuery = DruidDataFetcher.getDruidQuery(query)
+ druidQuery.toString() should be ("TimeSeriesQuery(List(CountAggregation(count_count)),List(2019-11-01/2019-11-02),None,Day,false,List(),Map())");
+
+ DateTimeUtils.setCurrentMillisFixed(1577836800000L); // Setting Jan 1 2020 as current time
+ query = DruidQueryModel("topN", "telemetry-events", "Last7Days", Option("day"), Option(List(Aggregation(Option("count"), "count", ""))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")))), None, None, None, intervalSlider = 2)
+ druidQuery = DruidDataFetcher.getDruidQuery(query)
+ druidQuery.toString() should be ("TopNQuery(DefaultDimension(context_pdata_id,Some(producer_id),None),100,count,List(CountAggregation(count)),List(2019-12-23T05:30:00+00:00/2019-12-30T05:30:00+00:00),Day,None,List(),Map())");
+ DateTimeUtils.setCurrentMillisSystem();
+ }
+
it should "fetch the data from druid using groupBy query type" in {
- val query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))))
+ val query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef])), Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+"))))
val druidQuery = DruidDataFetcher.getDruidQuery(query)
- druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,None,None,List(),Map())")
-
+ druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,Some(LessThanHaving(doubleSum,20.0)),None,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())")
+
val json: String = """
{
"total_scans" : 9007,
@@ -129,54 +257,86 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory {
}
"""
val doc: Json = parse(json).getOrElse(Json.Null);
- val results = List(DruidResult.apply(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC), doc));
- val druidResponse = DruidResponse.apply(results, QueryType.GroupBy)
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy)
implicit val mockFc = mock[FrameworkContext];
implicit val druidConfig = mock[DruidConfig];
val mockDruidClient = mock[DruidClient]
- (mockDruidClient.doQuery(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
- (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient);
+ (mockDruidClient.doQuery[DruidResponse](_:DruidNativeQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)).anyNumberOfTimes()
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
- val druidResult = DruidDataFetcher.getDruidData(query)
+ val druidResult = DruidDataFetcher.getDruidData(query).collect
druidResult.size should be (1)
druidResult.head should be ("""{"total_scans":9007.0,"producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""")
}
-
+
it should "fetch the data from druid using timeseries query type" in {
- val query = DruidQueryModel("timeSeries", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))))
+ val query = DruidQueryModel("timeSeries", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), None, Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+"))))
val druidQuery = DruidDataFetcher.getDruidQuery(query);
- druidQuery.toString() should be ("TimeSeriesQuery(List(CountAggregation(count_count)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),Day,false,List(),Map())");
-
- val json: String = """
+ druidQuery.toString() should be ("TimeSeriesQuery(List(CountAggregation(count_count)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),Day,false,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())");
+
+ var json: String = """
{
"total_scans" : 9007,
"producer_id" : "dev.sunbird.learning.platform"
}
"""
- val doc: Json = parse(json).getOrElse(Json.Null);
- val results = List(DruidResult.apply(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC), doc));
- val druidResponse = DruidResponse.apply(results, QueryType.Timeseries)
+ var doc: Json = parse(json).getOrElse(Json.Null);
+ var results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc));
+ var druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.Timeseries)
implicit val mockFc = mock[FrameworkContext];
implicit val druidConfig = mock[DruidConfig];
val mockDruidClient = mock[DruidClient]
- (mockDruidClient.doQuery(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
- (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient);
+ (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes()
+
+ var druidResult = DruidDataFetcher.getDruidData(query).collect
- val druidResult = DruidDataFetcher.getDruidData(query)
-
druidResult.size should be (1)
druidResult.head should be ("""{"total_scans":9007.0,"producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""")
+
+ json = """
+ {
+ "total_scans" : null,
+ "producer_id" : "dev.sunbird.learning.platform"
+ }
+ """
+ doc = parse(json).getOrElse(Json.Null);
+ results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc));
+ druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.Timeseries)
+ (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
+ // (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient)
+
+ druidResult = DruidDataFetcher.getDruidData(query).collect()
+ druidResult.size should be (1)
+ druidResult.head should be ("""{"total_scans":"unknown","producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""")
+
+ json = """
+ {
+ "total_scans" : {},
+ "producer_id" : "dev.sunbird.learning.platform"
+ }
+ """
+ doc = parse(json).getOrElse(Json.Null);
+ results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc));
+ druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.Timeseries)
+ (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
+ // (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient)
+
+ druidResult = DruidDataFetcher.getDruidData(query).collect()
+
+ druidResult.size should be (1)
}
it should "fetch the data from druid using topN query type" in {
- val query = DruidQueryModel("topN", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), Option(List(Aggregation(Option("count"), "count", ""))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))))
+ val query = DruidQueryModel("topN", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), Option(List(Aggregation(Option("count"), "count", ""))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), None, Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+"))))
val druidQuery = DruidDataFetcher.getDruidQuery(query);
- druidQuery.toString() should be ("TopNQuery(DefaultDimension(context_pdata_id,Some(producer_id),None),100,count,List(CountAggregation(count)),List(2019-11-01/2019-11-02),Day,Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(),Map())")
+ druidQuery.toString() should be ("TopNQuery(DefaultDimension(context_pdata_id,Some(producer_id),None),100,count,List(CountAggregation(count)),List(2019-11-01/2019-11-02),Day,Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())")
val json: String = """
[
@@ -187,23 +347,389 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory {
{
"count" : 1,
"producer_id" : "local.sunbird.desktop"
+ },
+ {
+ "count" : null,
+ "producer_id" : "local.sunbird.app"
+ },
+ {
+ "count" : {},
+ "producer_id" : "local.sunbird.app"
}
]
"""
val doc: Json = parse(json).getOrElse(Json.Null);
- val results = List(DruidResult.apply(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC), doc));
- val druidResponse = DruidResponse.apply(results, QueryType.TopN)
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.TopN)
+
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient)
+
+ val druidResult = DruidDataFetcher.getDruidData(query).collect()
+
+ druidResult.size should be (4)
+ druidResult(0) should be ("""{"date":"2019-11-28","count":5,"producer_id":"dev.sunbird.portal"}""")
+ druidResult(1) should be ("""{"date":"2019-11-28","count":1,"producer_id":"local.sunbird.desktop"}""")
+ druidResult(2) should be ("""{"date":"2019-11-28","count":"unknown","producer_id":"local.sunbird.app"}""")
+
+ val druidResponse2 = DruidResponseTimeseriesImpl.apply(List(), QueryType.TopN)
+ (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse2))
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient)
+ val druidResult2 = DruidDataFetcher.getDruidData(query).collect()
+ druidResult2.size should be (0)
+
+ }
+ it should "fetch the data from druid rollup cluster using groupBy query type" in {
+
+ val query = DruidQueryModel("groupBy", "telemetry-rollup-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef])), Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+"))))
+ val druidQuery = DruidDataFetcher.getDruidQuery(query)
+ druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,Some(LessThanHaving(doubleSum,20.0)),None,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())")
+
+ val json: String = """
+ {
+ "total_scans" : 9007,
+ "producer_id" : "dev.sunbird.learning.platform"
+ }
+ """
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy)
+
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
+ (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient);
+
+ val druidResult = DruidDataFetcher.getDruidData(query).collect()
+
+ druidResult.size should be (1)
+ druidResult.head should be ("""{"total_scans":9007.0,"producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""")
+ }
+
+ it should "fetch data for groupBy dimensions with extraction fn" in {
+ val qrScans = DruidQueryModel("groupBy", "telemetry-rollup-syncts", "2020-03-01/2020-04-01", Option("all"), Option(List(Aggregation(Option("total_scans"),"longSum", "total_count"))), Option(List(DruidDimension("derived_loc_state", Option("state")), DruidDimension("derived_loc_district", Option("district"),Option("Extraction"), Option("STRING"), Option(List(ExtractFn("javascript", "function(str){return str == null ? null: str.toLowerCase().trim().split(' ').map(function(t){return t.substring(0,1).toUpperCase()+t.substring(1,t.length)}).join(' ')}")))))), Option(List(DruidFilter("in", "object_type", None, Option(List("qr", "Qr", "DialCode", "dialcode"))), DruidFilter("equals", "eid", Option("SEARCH")), DruidFilter("equals", "derived_loc_state", Option("Andhra Pradesh")), DruidFilter("isnotnull", "derived_loc_district", None))))
+ val druidQuery = DruidDataFetcher.getDruidQuery(qrScans)
+ druidQuery.toString should be ("GroupByQuery(List(LongSumAggregation(total_scans,total_count)),List(2020-03-01/2020-04-01),Some(AndFilter(List(InFilter(object_type,List(qr, Qr, DialCode, dialcode),None), SelectFilter(eid,Some(SEARCH),None), SelectFilter(derived_loc_state,Some(Andhra Pradesh),None), NotFilter(SelectFilter(derived_loc_district,None,None))))),List(DefaultDimension(derived_loc_state,Some(state),None), ExtractionDimension(derived_loc_district,Some(district),Some(STRING),JavascriptExtractionFn(function(str){return str == null ? null: str.toLowerCase().trim().split(' ').map(function(t){return t.substring(0,1).toUpperCase()+t.substring(1,t.length)}).join(' ')},Some(false)))),All,None,None,List(),Map())")
+
+
+ val json = """{"total_scans":7257.0,"district":"Anantapur","state":"Andhra Pradesh","date":"2020-03-01"}"""
+
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy)
implicit val mockFc = mock[FrameworkContext];
implicit val druidConfig = mock[DruidConfig];
val mockDruidClient = mock[DruidClient]
- (mockDruidClient.doQuery(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
- (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient);
+ (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)).anyNumberOfTimes()
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+ (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+ val druidResult = DruidDataFetcher.getDruidData(qrScans).collect()
+
+ druidResult.size should be (1)
+ druidResult.head should be ("""{"total_scans":7257.0,"district":"Anantapur","state":"Andhra Pradesh","date":"2020-03-01"}""")
+ }
+
+ "TesthLL" should "fetch data for groupBy dimension with HLLAggregator" in {
+ val districtMonthly = DruidQueryModel("groupBy", "summary-distinct-counts", "2020-05-12/2020-05-13", Option("all"), Option(List(Aggregation(Option("total_unique_devices"), "HLLSketchMerge", "unique_devices", None, None, None, None, None), Aggregation(None, "HLLSketchMerge", "devices", None, None, None, None, None), Aggregation(Option("Count"), "count", ""))), Option(List(DruidDimension("derived_loc_state", Option("state")), DruidDimension("derived_loc_district", Option("district")))), Option(List(DruidFilter("in", "dimensions_pdata_id", None, Option(List("prod.diksha.app", "prod.diksha.portal"))), DruidFilter("isnotnull", "derived_loc_district", None))))
+ val druidQuery = DruidDataFetcher.getDruidQuery(districtMonthly)
+ druidQuery.toString should be ("GroupByQuery(List(HLLAggregation(total_unique_devices,unique_devices,12,HLL_4,true), HLLAggregation(hllsketchmerge_devices,devices,12,HLL_4,true), CountAggregation(Count)),List(2020-05-12/2020-05-13),Some(AndFilter(List(InFilter(dimensions_pdata_id,List(prod.diksha.app, prod.diksha.portal),None), NotFilter(SelectFilter(derived_loc_district,None,None))))),List(DefaultDimension(derived_loc_state,Some(state),None), DefaultDimension(derived_loc_district,Some(district),None)),All,None,None,List(),Map())")
+
+ val json = """{"state":"Andaman & Nicobar Islands","total_unique_devices":1.0,"Count":9.0,"date":"2020-03-01","district":"Ahmednagar"}"""
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy)
+
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+ (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+
+ val druidResult = DruidDataFetcher.getDruidData(districtMonthly).collect()
+ druidResult.size should be (1)
+ druidResult.head should be ("""{"state":"Andaman & Nicobar Islands","total_unique_devices":1.0,"Count":9.0,"date":"2020-03-01","district":"Ahmednagar"}""")
+ }
+
+ "TestFetcher" should "fetch data for TopN dimension with Lookup" in {
+ val query = DruidQueryModel("topN", "telemetry-events", "2020-03-12T00:00:00+00:00/2020-05-12T00:00:00+00:00", Option("all"),
+ Option(List(Aggregation(Option("count"), "count", "count"))),
+ Option(List(DruidDimension("dialcode_channel", Option("dialcode_slug"), Option("extraction"), None,
+ Option(List(ExtractFn("registeredlookup", "channel")))))),
+ Option(List(DruidFilter("equals", "dialcode_channel", Option("012315809814749184151")))), None, None, None,None,None, Option("count"))
+
+ val druidQuery = DruidDataFetcher.getDruidQuery(query)
+ druidQuery.toString should be("TopNQuery(ExtractionDimension(dialcode_channel,Some(dialcode_slug),None,RegisteredLookupExtractionFn(channel,Some(false),None)),100,count,List(CountAggregation(count)),List(2020-03-12T00:00:00+00:00/2020-05-12T00:00:00+00:00),All,Some(AndFilter(List(SelectFilter(dialcode_channel,Some(012315809814749184151),None)))),List(),Map())")
+
+ val json = """[{"date":"2020-03-13","count":9,"dialcode_slug":"Andaman & Nicobar Islands"}]"""
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.TopN)
+
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.doQuery[DruidResponse](_: DruidQuery)(_: DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)).anyNumberOfTimes()
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+ (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+
+ val druidResult = DruidDataFetcher.getDruidData(query).collect()
+ druidResult.size should be (1)
+ druidResult.head should be ("""{"date":"2020-03-13","count":9,"dialcode_slug":"Andaman & Nicobar Islands"}""")
+ }
+
+ it should "fetch data for GroupBy dimension with Lookup and replaceMissingValue as Unknown" in {
+ val lookupQuery = DruidQueryModel("groupBy", "telemetry-events", "2020-05-08T00:00:00+00:00/2020-05-15T00:00:00+00:00", Option("all"),
+ Option(List(Aggregation(Option("count"), "count", "count"))),
+ Option(List(DruidDimension("derived_loc_state", Option("state_slug"), Option("extraction"), None,
+ Option(List(ExtractFn("registeredlookup", "lookup_state", None, Option("Unknown"))))), DruidDimension("derived_loc_district", Option("district_slug"), Option("extraction"), None,
+ Option(List(ExtractFn("registeredlookup", "lookup_district", None, Option("Unknown"))))))))
+
+ val query = DruidDataFetcher.getDruidQuery(lookupQuery)
+ query.toString should be("GroupByQuery(List(CountAggregation(count)),List(2020-05-08T00:00:00+00:00/2020-05-15T00:00:00+00:00),None,List(ExtractionDimension(derived_loc_state,Some(state_slug),None,RegisteredLookupExtractionFn(lookup_state,None,Some(Unknown))), ExtractionDimension(derived_loc_district,Some(district_slug),None,RegisteredLookupExtractionFn(lookup_district,None,Some(Unknown)))),All,None,None,List(),Map())")
+
+ val json = """{"district_slug":"Andamans","state_slug":"Andaman & Nicobar Islands","count":138.0,"date":"2020-05-08"}"""
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy)
+
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.doQuery[DruidResponse](_: DruidQuery)(_: DruidConfig)).expects(query, *).returns(Future(druidResponse)).anyNumberOfTimes()
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+ (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+
+ val druidResult = DruidDataFetcher.getDruidData(lookupQuery).collect()
+ druidResult.size should be (1)
+ druidResult.head should be ("""{"district_slug":"Andamans","state_slug":"Andaman & Nicobar Islands","count":138.0,"date":"2020-03-01"}""")
+ }
+
+ it should "fetch data for filtered aggregation" in {
+ val scansQuery = DruidQueryModel("groupBy", "summary-distinct-counts", "2020-05-12/2020-05-13", Option("all"), Option(List(Aggregation(Option("total_failed_scans"), "filtered", "total_count", None, None, None, None, None, None, Option("longSum"), Option("edata_size"), Option(0.asInstanceOf[AnyRef])))), Option(List(DruidDimension("derived_loc_state", Option("state")), DruidDimension("derived_loc_district", Option("district")))), Option(List(DruidFilter("in", "dimensions_pdata_id", None, Option(List("prod.diksha.app", "prod.diksha.portal"))), DruidFilter("isnotnull", "derived_loc_district", None))))
+ val druidQuery = DruidDataFetcher.getDruidQuery(scansQuery)
+ druidQuery.toString should be ("GroupByQuery(List(SelectorFilteredAggregation(total_failed_scans,SelectFilter(edata_size,Some(0),None),LongSumAggregation(total_failed_scans,total_count))),List(2020-05-12/2020-05-13),Some(AndFilter(List(InFilter(dimensions_pdata_id,List(prod.diksha.app, prod.diksha.portal),None), NotFilter(SelectFilter(derived_loc_district,None,None))))),List(DefaultDimension(derived_loc_state,Some(state),None), DefaultDimension(derived_loc_district,Some(district),None)),All,None,None,List(),Map())")
+
+ val json = """{"state":"Andaman & Nicobar Islands","total_failed_scans":10,"date":"2020-03-01","district":"Ahmednagar"}"""
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy)
+
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse))
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+ (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+
+ val druidResult = DruidDataFetcher.getDruidData(scansQuery).collect()
+ druidResult.size should be (1)
+ druidResult.head should be ("""{"state":"Andaman & Nicobar Islands","total_failed_scans":10.0,"date":"2020-03-01","district":"Ahmednagar"}""")
+ }
+
+ it should "give result for stream query" in {
+ val query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef])), Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+"))))
+ val druidQuery = DruidDataFetcher.getDruidQuery(query)
+ druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,Some(LessThanHaving(doubleSum,20.0)),None,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())")
+
+ val json: String = """
+ {
+ "total_scans" : 9007,
+ "producer_id" : "dev.sunbird.learning.platform"
+ }
+ """
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)
+
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes()
+ (mockDruidClient.doQueryAsStream(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Source(List(druidResponse))).anyNumberOfTimes()
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+
+ val druidResult = DruidDataFetcher.getDruidData(query,true).collect()
+
+ druidResult.size should be (1)
+ druidResult.head should be ("""{"total_scans":9007.0,"producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""")
+ }
+
+ it should "give result for stream topn query" in {
+ val query = DruidQueryModel("topN", "telemetry-events", "2020-03-12T00:00:00+00:00/2020-05-12T00:00:00+00:00", Option("all"),
+ Option(List(Aggregation(Option("count"), "count", "count"))),
+ Option(List(DruidDimension("dialcode_channel", Option("dialcode_slug"), Option("extraction"), None,
+ Option(List(ExtractFn("registeredlookup", "channel")))))),
+ Option(List(DruidFilter("equals", "dialcode_channel", Option("012315809814749184151")))), None, None,None, None,None, Option("count"))
+ val druidQuery = DruidDataFetcher.getDruidQuery(query)
+
+ val json = """[{"date":"2020-03-13","count":9,"dialcode_slug":"Andaman & Nicobar Islands"}]"""
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc));
+ val druidResponse = DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)
+
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes()
+ (mockDruidClient.doQueryAsStream(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Source(List(druidResponse))).anyNumberOfTimes()
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+
+ val druidResult = DruidDataFetcher.getDruidData(query,true).collect()
+
+ druidResult.size should be (1)
+ druidResult.head should be ("""{"date":"2020-03-13","count":9,"dialcode_slug":"Andaman & Nicobar Islands"}""")
+ }
+
+ it should "test scan query with stream" in {
+
+ val query = DruidQueryModel("scan", "summary-rollup-syncts", "2020-03-12T00:00:00+00:00/2020-03-13T00:00:00+00:00", Option("all"),
+ None, None, None, None, None,Option(List("derived_loc_state","derived_loc_district")), None, None)
+ val druidQuery = DruidDataFetcher.getDruidQuery(query)
+
+ val json = """{"__time":1583971200000,"derived_loc_state":"unknown","derived_loc_district":"unknown","date":"2019-03-12"}"""
+ val json1 = """{"__time":1583971200000,"derived_loc_state":"ka","derived_loc_district":"unknown","date":"2019-03-12"}"""
+ val json2 = """{"__time":1583971200000,"derived_loc_state":"apekx","derived_loc_district":"Vizag","date":"2019-03-12"}"""
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ val doc1: Json = parse(json1).getOrElse(Json.Null)
+ val doc2: Json = parse(json2).getOrElse(Json.Null)
+ val druidResponse = DruidScanResult.apply(doc)
+ val druidResponse1 = DruidScanResult.apply(doc1)
+ val druidResponse2 = DruidScanResult.apply(doc2)
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes()
+ (mockDruidClient.doQueryAsStream(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Source(List(druidResponse,druidResponse1,druidResponse2))).anyNumberOfTimes()
+ (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+
+ val druidResult = DruidDataFetcher.getDruidData(query,true).collect()
+
+ druidResult.size should be (3)
+ druidResult.head should be ("""{"__time":1.5839712E12,"derived_loc_state":"unknown","derived_loc_district":"unknown","date":"2020-03-12"}""")
+
+ }
+
+ it should "test scan query without stream" in {
+
+ val query = DruidQueryModel("scan", "summary-events", "2020-03-12T00:00:00+00:00/2020-03-13T00:00:00+00:00", Option("all"),
+ None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), None, None,None, None, None)
+ val druidQuery = DruidDataFetcher.getDruidQuery(query)
+ val json = """{"__time":1583971200000,"derived_loc_state":"unknown","derived_loc_district":"unknown","date":"2019-03-12","created_for": null,"active":true}"""
+ val doc: Json = parse(json).getOrElse(Json.Null)
+ val results = List(DruidScanResult.apply(doc));
+ val scanresults = DruidScanResults.apply("122",List("derived_loc_state","derived_loc_district","active"),results)
+ val druidResponse = DruidScanResponse.apply(List(scanresults))
+ implicit val mockFc = mock[FrameworkContext]
+ implicit val druidConfig = mock[DruidConfig]
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes()
+ (mockDruidClient.doQuery[DruidResponse](_:DruidNativeQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)).anyNumberOfTimes()
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+
+ val druidResult = DruidDataFetcher.getDruidData(query).collect()
+
+ druidResult.size should be (1)
+ druidResult.head should be (
+ """{"created_for":"unknown","derived_loc_state":"unknown","__time":1.5839712E12,"date":"2020-03-12","derived_loc_district":"unknown","active":true}""".stripMargin)
+
+ }
+
+ it should "test query with stream with empty results" in {
+ val query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef])), Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+"))))
+ val druidQuery = DruidDataFetcher.getDruidQuery(query)
+ druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,Some(LessThanHaving(doubleSum,20.0)),None,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())")
+
+ val json: String = """
+ {
+ }
+ """
+ val doc: Json = parse(json).getOrElse(Json.Null);
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes()
+ (mockDruidClient.doQueryAsStream(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Source(List())).anyNumberOfTimes()
+ (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+
+ val druidResult = DruidDataFetcher.getDruidData(query,true).collect()
+
+ druidResult.size should be (0)
+ }
+
+ it should "test sql query " in {
+
+ val sqlQuery = DruidQueryModel("scan", "summary-rollup-syncts", "2020-08-23T00:00:00+00:00/2020-08-24T00:00:00+00:00", Option("all"),
+ None, None, None, None, None, None, Option(List(DruidSQLDimension("state",Option("LOOKUP(derived_loc_state, 'stateSlugLookup')")),
+ DruidSQLDimension("dimensions_pdata_id",None))),None)
+
+
+ implicit val mockFc = mock[FrameworkContext];
+ implicit val druidConfig = mock[DruidConfig];
+
+
+ val mockAKkaUtil = mock[AkkaHttpClient]
+ val url = String.format("%s://%s:%s%s%s", "http",AppConf.getConfig("druid.rollup.host"),
+ AppConf.getConfig("druid.rollup.port"),AppConf.getConfig("druid.url"),"sql")
+ val request = HttpRequest(method = HttpMethods.POST,
+ uri = url,
+ entity = HttpEntity(ContentTypes.`application/json`, JSONUtils.serialize(DruidDataFetcher.getSQLDruidQuery(sqlQuery))))
+ val stripString =
+ """{"dimensions_pdata_id":"", "state":10}
+ {"dimensions_pdata_id":null, "state":5}
+ |{"dimensions_pdata_id":"dev.portal", "state":5}""".stripMargin
+ val mockDruidClient = mock[DruidClient]
+ (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes()
+ (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes();
+ (mockAKkaUtil.sendRequest(_: HttpRequest)(_: ActorSystem))
+ .expects(request,mockDruidClient.actorSystem)
+ .returns(Future.successful(HttpResponse(entity = HttpEntity(ByteString(stripString))))).anyNumberOfTimes();
+ val response = DruidDataFetcher.executeSQLQuery(sqlQuery, mockAKkaUtil)
+ response.count() should be (3)
+ }
+
+ "DruidDataFetcher" should "verify DruidOutput operations" in {
+ val json: String =
+ """
+ {
+ "total_sessions" : 2000,
+ "total_ts" : 5,
+ "district" : "Nellore",
+ "state" : "Andhra Pradesh"
+ }
+ """
+
+ val output = new DruidOutput(JSONUtils.deserialize[Map[String,AnyRef]](json))
+ output.size should be(4)
+ val output2 =output + ("count" -> 1)
+ output2.size should be(5)
+ val output3 = output - ("count")
+ output3.size should be(4)
+ output3.get("total_ts").get should be(5)
+ }
+
+
+ it should "test the latest_index granularity" in {
+ EmbeddedPostgresqlService.execute("INSERT INTO druid_segments (id,datasource,start,\"end\",used) VALUES('segment1','content-model-snapshot','2020-10-27T00:00:00.000Z','2020-10-28T00:00:00.000Z','t')")
+ val query = DruidQueryModel("groupBy", "content-model-snapshot", "LastDay",
+ Option("latest_index"), Option(List(Aggregation(Option("count"), "count", ""))),
+ Option(List(DruidDimension("status", Option("status")))),
+ None,None,None)
+ val druidQuery = DruidDataFetcher.getDruidQuery(query)
- val druidResult = DruidDataFetcher.getDruidData(query)
+ druidQuery.toDebugString.contains("2020-10-27T00:00:00.000Z") should be (true)
+ druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count)),List(2020-10-27T00:00:00.000Z/2020-10-28T00:00:00.000Z),None,List(DefaultDimension(status,Some(status),None)),All,None,None,List(),Map())")
+ val query1 = DruidQueryModel("groupBy", "content-snapshot", "2019-11-01/2019-11-02",
+ Option("latest_index"), Option(List(Aggregation(Option("count"), "count", ""))),
+ Option(List(DruidDimension("status", Option("status")))),
+ None,None,None)
+ val druidQuery1 = DruidDataFetcher.getDruidQuery(query1)
+ druidQuery1.toDebugString.contains("2019-11-01") should be (true)
- druidResult.size should be (2)
- druidResult.head should be ("""{"date":"2019-11-28","count":5,"producer_id":"dev.sunbird.portal"}""")
- druidResult.last should be ("""{"date":"2019-11-28","count":1,"producer_id":"local.sunbird.desktop"}""")
}
}
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/EmbeddedPostgresqlService.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/EmbeddedPostgresqlService.scala
new file mode 100644
index 00000000..160fce60
--- /dev/null
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/EmbeddedPostgresqlService.scala
@@ -0,0 +1,54 @@
+package org.ekstep.analytics.framework.util
+
+import java.sql.{ResultSet, Statement}
+
+import io.zonky.test.db.postgres.embedded.EmbeddedPostgres
+import java.sql.Connection
+
+object EmbeddedPostgresqlService {
+
+ var pg: EmbeddedPostgres = null;
+ var connection: Connection = null;
+ var stmt: Statement = null;
+
+ def start() {
+ println("******** Establishing The Postgress Connection *********")
+ pg = EmbeddedPostgres.builder().setPort(65124).start()
+ connection = pg.getPostgresDatabase().getConnection()
+ stmt = connection.createStatement()
+ println("connection.getClientInfo" + connection.getClientInfo)
+ }
+
+ def createNominationTable(): Boolean = {
+ val tableName: String = "druid_segments"
+ val query =
+ s"""
+ |CREATE TABLE IF NOT EXISTS $tableName (
+ | id TEXT PRIMARY KEY,
+ | datasource TEXT,
+ | start TEXT,
+ | \"end\" TEXT,
+ | used TEXT)""".stripMargin
+
+ execute(query)
+ }
+
+ def execute(sqlString: String): Boolean = {
+ stmt.execute(sqlString)
+ }
+
+ def executeQuery(sqlString: String): ResultSet = {
+ stmt.executeQuery(sqlString)
+ }
+
+ def dropTable(tableName: String): Boolean = {
+ stmt.execute(s"DROP TABLE $tableName")
+ }
+
+ def close() {
+ println("******** Closing The Postgress Connection *********")
+ stmt.close()
+ connection.close()
+ pg.close()
+ }
+}
\ No newline at end of file
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestCommonUtil.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestCommonUtil.scala
index a4404c75..b1541933 100644
--- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestCommonUtil.scala
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestCommonUtil.scala
@@ -11,271 +11,395 @@ import java.text.SimpleDateFormat
import scala.collection.mutable.ListBuffer
import org.joda.time.format.DateTimeFormat
import org.ekstep.analytics.framework.Period._
+import org.joda.time.DateTimeUtils
+import ing.wbaa.druid.definitions.GranularityType
+import com.google.common.eventbus.Subscribe
+import org.ekstep.analytics.framework.conf.AppConf
class TestCommonUtil extends BaseSpec {
- it should "pass test case of all methods in CommonUtil" in {
- try {
- //datesBetween
- val from = new LocalDate("2016-01-01");
- val to = new LocalDate("2016-01-04");
- CommonUtil.datesBetween(from, to).toArray should be(Array(new LocalDate("2016-01-01"), new LocalDate("2016-01-02"), new LocalDate("2016-01-03"), new LocalDate("2016-01-04")))
-
- //deleteDirectory
- val path = "delete-this";
- val dir = new File(path)
- val dirCreated = dir.mkdir;
- dirCreated should be(true);
- val fp = "delete-this/delete-this.txt";
- val f = new File(fp);
- f.createNewFile();
- CommonUtil.deleteDirectory(path)
- dir.isDirectory() should be(false);
- f.isFile() should be(false);
-
- //deleteFile
- val filePath = "delete-this.txt";
- val noFile = "no-file.txt"
- val file = new File(filePath);
- val created = file.createNewFile();
- created should be(true);
- CommonUtil.deleteFile(filePath)
- CommonUtil.deleteFile(noFile)
- file.isFile() should be(false);
-
- //getAge
- val dateformat = new SimpleDateFormat("dd/MM/yyyy");
- val dob = dateformat.parse("04/07/1990");
- CommonUtil.getAge(dob) should be > (25)
-
- //getDatesBetween
- CommonUtil.getDatesBetween("2016-01-01", Option("2016-01-04")) should be(Array("2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04"))
- CommonUtil.getDatesBetween("2016-01-01", None) should not be null;
-
- //getEvent
- val line = "{\"eid\":\"OE_START\",\"ts\":\"2016-01-01T12:13:20+05:30\",\"@timestamp\":\"2016-01-02T00:59:22.924Z\",\"ver\":\"1.0\",\"gdata\":{\"id\":\"org.ekstep.aser.lite\",\"ver\":\"5.7\"},\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
- val event = JSONUtils.deserialize[Event](line);
- val line2 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
- val event2 = JSONUtils.deserialize[Event](line2);
- val line3 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02T00:59:22+05:30\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
- val event3 = JSONUtils.deserialize[Event](line3);
- val line4 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02T00:59:22P:ST\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
- val event4 = JSONUtils.deserialize[Event](line4);
- val line5 = "{\"eid\":\"OE_START\",\"ets\":1451630600000,\"@timestamp\":\"2016-01-02T00:59:22.924Z\",\"ver\":\"1.0\",\"gdata\":{\"id\":\"org.ekstep.aser.lite\",\"ver\":\"5.7\"},\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
- val event5 = JSONUtils.deserialize[Event](line5);
-
- //getEventDate yyyy-MM-dd'T'HH:mm:ssZZ
- val evDate = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ssZZ").parseLocalDate("2016-01-01T12:13:20+05:30").toDate;
- CommonUtil.getEventDate(event) should be(evDate)
-
- //getEventTs
- CommonUtil.getEventTS(event) should be(1451630600000L)
- CommonUtil.getEventTS(event5) should be(1451630600000L)
- CommonUtil.getEventSyncTS(event) should be(1451696362924L)
- CommonUtil.getEventSyncTS(event2) should be(0L)
- CommonUtil.getEventSyncTS(event3) should be(1451676562000L)
- CommonUtil.getEventSyncTS(event4) should be(1451696362000L)
-
- CommonUtil.getEventTS(event2) should be(0)
-
- CommonUtil.getEventDate(event2) should be(null)
-
- //getGameId
- CommonUtil.getGameId(event) should be("org.ekstep.aser.lite")
- CommonUtil.getGameId(event2) should be(null)
-
- //getGameVersion
- CommonUtil.getGameVersion(event) should be("5.7")
- CommonUtil.getGameVersion(event2) should be(null)
-
- //getHourOfDay
- CommonUtil.getHourOfDay(1447154514000L, 1447158114000L) should be(ListBuffer(11, 12))
- CommonUtil.getHourOfDay(1447154514000L, 1447000L) should be(ListBuffer(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0))
-
- //getParallelization
- val config = new JobConfig(null, None, None, null, None, None, Option(10), Option("testApp"), Option(false));
- CommonUtil.getParallelization(config) should be(10)
-
- val config2 = new JobConfig(null, None, None, null, None, None, None, Option("testApp"), Option(false));
- CommonUtil.getParallelization(config) should be(10)
-
- //getParallelization
- val con = Option(Map("search" -> null, "filters" -> null, "sort" -> null, "model" -> null, "modelParams" -> null, "output" -> null, "parallelization" -> "10", "appName" -> "testApp", "deviceMapping" -> null))
- CommonUtil.getParallelization(con) should be(10)
-
- //getStartDate
- CommonUtil.getStartDate(Option("2016-01-08"), 7) should be(Option("2016-01-01"))
- CommonUtil.getStartDate(None, 0) should be(Option(LocalDate.fromDateFields(new Date).toString()))
-
- //getTimeDiff
- CommonUtil.getTimeDiff(1451650400000L, 1451650410000L) should be(Option(10d))
- CommonUtil.getTimeDiff(1451650400000L, 1451650410000L) should be(Option(10d))
-
- CommonUtil.getTimeDiff(event, event) should be(Option(0d))
- CommonUtil.getTimeDiff(event, event2) should be(Option(0d))
-
- //getTimeSpent
- CommonUtil.getTimeSpent("10") should be(Option(10d))
- CommonUtil.getTimeSpent(10d.asInstanceOf[AnyRef]) should be(Option(10d))
- CommonUtil.getTimeSpent(10.asInstanceOf[AnyRef]) should be(Option(10d))
- CommonUtil.getTimeSpent(null) should be(Option(0d))
- CommonUtil.getTimeSpent(true.asInstanceOf[AnyRef]) should be(Option(0d))
-
- CommonUtil.getTimestamp("2016-01-02T00:59:22+P:ST") should be(1451696362000L);
-
- CommonUtil.roundDouble(12.7345, 2) should be(12.73);
-
- //gzip
- val testPath = "src/test/resources/sample_telemetry.log";
- CommonUtil.gzip(testPath)
- new File("src/test/resources/sample_telemetry.log.gz").isFile() should be(true)
- CommonUtil.deleteFile("src/test/resources/sample_telemetry.log.gz");
-
- a[Exception] should be thrownBy {
- CommonUtil.gzip("src/test/resources/sample_telemetry.txt")
- }
-
- CommonUtil.getParallelization(None) should be(10);
-
- CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L)) should be("1D99B2F1C6637AE21081CD981AFFB56F");
- CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L), "org.ekstep.aser.lite") should be("6D5DCB288B1A9BC3036D04C37FF08EDF");
-
- CommonUtil.getMessageId("ME_TEST", "123", "MONTH", 1451650400000L, None, None) should be("D0BF57F856E3B7FAD5E47CCD4B31DE57");
-
- val res = CommonUtil.time({
-
- CommonUtil.getWeeksBetween(1451650400000L, 1454650400000L) should be(5)
- CommonUtil.getPeriod(1451650400000L, DAY) should be(20160101)
- CommonUtil.getPeriod(1451650400000L, WEEK) should be(2015753)
- CommonUtil.getPeriod(1452250748000L, WEEK) should be(2016701)
- CommonUtil.getPeriod(1451650400000L, MONTH) should be(201601)
- CommonUtil.getPeriod(1451650400000L, CUMULATIVE) should be(0)
- CommonUtil.getPeriod(1451650400000L, LAST7) should be(7)
- CommonUtil.getPeriod(1451650400000L, LAST30) should be(30)
- CommonUtil.getPeriod(1451650400000L, LAST90) should be(90)
- CommonUtil.getPeriod(new DateTime("2016-01-01"), DAY) should be(20160101)
-
- })
- res._1 should be > (0L)
-
- //getTags
- val metaData1 = Map("tags" -> List("test", "QA"), "activation_keys" -> "ptm007")
- val tags1 = CommonUtil.getTags(metaData1).get
- tags1.length should be(2)
-
- val metaData2 = Map("activation_keys" -> "ptm007", "tags" -> null)
- val tags2 = CommonUtil.getTags(metaData2).get
- tags2.length should be(0)
-
- val metaData3 = Map("activation_keys" -> "ptm007")
- val tags3 = CommonUtil.getTags(metaData3).get
- tags3.length should be(0)
-
- CommonUtil.daysBetween(new DateTime(1451650400000L).toLocalDate(), new DateTime(1454650400000L).toLocalDate()) should be(35);
- } catch {
- case ex: Exception => ex.printStackTrace();
- }
-
- CommonUtil.getPathFromURL("https://ekstep-public.s3-ap-southeast-1.amazonaws.com/ecar_files/domain_38527_1460631037666.ecar") should be("/ecar_files/domain_38527_1460631037666.ecar")
-
- // getPeriods
- val daysArray = CommonUtil.getPeriods(DAY, 5)
- daysArray.length should be(5)
-
- val weeksArray = CommonUtil.getPeriods(WEEK, 5)
- weeksArray.length should be(5)
-
- val monthsArray = CommonUtil.getPeriods(MONTH, 5)
- monthsArray.length should be(5)
-
- val cumArray = CommonUtil.getPeriods(CUMULATIVE, 5)
- cumArray.length should be(1)
-
- CommonUtil.getPeriods("DAY", 5)
- CommonUtil.getPeriods("WEEK", 5)
- CommonUtil.getPeriods("MONTH", 5)
- CommonUtil.getPeriods("CUMULATIVE", 5)
-
- //getValidTags
- val dEvent1 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1474439187443,\"syncts\":1474351045034,\"ver\":\"1.0\",\"mid\":\"0C2CE73054050FE7D0E03B5A71A35829\",\"uid\":\"3b81dc76-917c-4a67-9f08-1d84b201820c\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"LearnerSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1474350532673,\"to\":1474350628540}},\"dimensions\":{\"did\":\"38e8baf2f7d2fa48fd73dac95dec23348992a907\",\"gdata\":{\"id\":\"do_30043182\",\"ver\":\"10\"},\"loc\":\"\",\"group_user\":false,\"anonymous_user\":true},\"edata\":{\"eks\":{\"levels\":[],\"noOfAttempts\":1,\"timeSpent\":95.87,\"interruptTime\":8.38,\"timeDiff\":95.87,\"start_time\":1474350532673,\"end_time\":1474350628540,\"currentLevel\":{},\"noOfLevelTransitions\":-1,\"interactEventsPerMin\":107.65,\"completionStatus\":false,\"screenSummary\":[{\"id\":\"reading_word_stage\",\"timeSpent\":5.33},{\"id\":\"homeScreen\",\"timeSpent\":6.37},{\"id\":\"writing_assess_stage\",\"timeSpent\":17.09},{\"id\":\"assessment_stage_three\",\"timeSpent\":7.72},{\"id\":\"splash\",\"timeSpent\":6.79},{\"id\":\"assessment_stage_one\",\"timeSpent\":10.92},{\"id\":\"writing_stage\",\"timeSpent\":4.36},{\"id\":\"assessment_stage_two\",\"timeSpent\":4.35},{\"id\":\"endScreen\",\"timeSpent\":4.31},{\"id\":\"reading_stage\",\"timeSpent\":20.27}],\"noOfInteractEvents\":172,\"eventsSummary\":[{\"id\":\"OE_ITEM_RESPONSE\",\"count\":1},{\"id\":\"OE_START\",\"count\":1},{\"id\":\"OE_NAVIGATE\",\"count\":13},{\"id\":\"OE_INTERACT\",\"count\":171},{\"id\":\"OE_INTERRUPT\",\"count\":2},{\"id\":\"OE_ASSESS\",\"count\":23},{\"id\":\"OE_END\",\"count\":1}],\"syncDate\":1474351045034,\"contentType\":\"Story\",\"mimeType\":\"application/vnd.ekstep.ecml-archive\",\"itemResponses\":[{\"time_stamp\":1474350574049,\"score\":1,\"timeSpent\":5.0,\"mmc\":[],\"res\":[\"5:D\"],\"resValues\":[{\"5\":\"D\"}],\"itemId\":\"esl.l3q28\",\"mc\":[]},{\"time_stamp\":1474350575965,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q29\",\"mc\":[]},{\"time_stamp\":1474350577358,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q30\",\"mc\":[]},{\"time_stamp\":1474350578565,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q31\",\"mc\":[]},{\"time_stamp\":1474350579836,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q32\",\"mc\":[]},{\"time_stamp\":1474350581019,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q33\",\"mc\":[]},{\"time_stamp\":1474350582208,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q34\",\"mc\":[]},{\"time_stamp\":1474350583517,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q35\",\"mc\":[]},{\"time_stamp\":1474350584901,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q36\",\"mc\":[]},{\"time_stamp\":1474350586118,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q37\",\"mc\":[]},{\"time_stamp\":1474350601438,\"score\":0,\"timeSpent\":9.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q38\",\"mc\":[]},{\"time_stamp\":1474350602642,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q39\",\"mc\":[]},{\"time_stamp\":1474350603195,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q40\",\"mc\":[]},{\"time_stamp\":1474350603602,\"score\":0,\"timeSpent\":0.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q41\",\"mc\":[]},{\"time_stamp\":1474350606416,\"score\":0,\"timeSpent\":3.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q42\",\"mc\":[]},{\"time_stamp\":1474350607301,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q43\",\"mc\":[]},{\"time_stamp\":1474350608010,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q44\",\"mc\":[]},{\"time_stamp\":1474350610031,\"score\":0,\"timeSpent\":2.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q45\",\"mc\":[]},{\"time_stamp\":1474350611213,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q46\",\"mc\":[]},{\"time_stamp\":1474350621749,\"score\":0,\"timeSpent\":11.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q47\",\"mc\":[]},{\"time_stamp\":1474350622758,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q48\",\"mc\":[]},{\"time_stamp\":1474350623511,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q49\",\"mc\":[]},{\"time_stamp\":1474350624180,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q50\",\"mc\":[]}]}},\"etags\":{\"app\":[]}}"
- val derivedEvent1 = JSONUtils.deserialize[DerivedEvent](dEvent1);
- CommonUtil.getValidTags(derivedEvent1, Array("test"))
-
- val dEvent2 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1474439187443,\"syncts\":1474351045034,\"ver\":\"1.0\",\"mid\":\"0C2CE73054050FE7D0E03B5A71A35829\",\"uid\":\"3b81dc76-917c-4a67-9f08-1d84b201820c\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"LearnerSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1474350532673,\"to\":1474350628540}},\"dimensions\":{\"did\":\"38e8baf2f7d2fa48fd73dac95dec23348992a907\",\"gdata\":{\"id\":\"do_30043182\",\"ver\":\"10\"},\"loc\":\"\",\"group_user\":false,\"anonymous_user\":true},\"edata\":{\"eks\":{\"levels\":[],\"noOfAttempts\":1,\"timeSpent\":95.87,\"interruptTime\":8.38,\"timeDiff\":95.87,\"start_time\":1474350532673,\"end_time\":1474350628540,\"currentLevel\":{},\"noOfLevelTransitions\":-1,\"interactEventsPerMin\":107.65,\"completionStatus\":false,\"screenSummary\":[{\"id\":\"reading_word_stage\",\"timeSpent\":5.33},{\"id\":\"homeScreen\",\"timeSpent\":6.37},{\"id\":\"writing_assess_stage\",\"timeSpent\":17.09},{\"id\":\"assessment_stage_three\",\"timeSpent\":7.72},{\"id\":\"splash\",\"timeSpent\":6.79},{\"id\":\"assessment_stage_one\",\"timeSpent\":10.92},{\"id\":\"writing_stage\",\"timeSpent\":4.36},{\"id\":\"assessment_stage_two\",\"timeSpent\":4.35},{\"id\":\"endScreen\",\"timeSpent\":4.31},{\"id\":\"reading_stage\",\"timeSpent\":20.27}],\"noOfInteractEvents\":172,\"eventsSummary\":[{\"id\":\"OE_ITEM_RESPONSE\",\"count\":1},{\"id\":\"OE_START\",\"count\":1},{\"id\":\"OE_NAVIGATE\",\"count\":13},{\"id\":\"OE_INTERACT\",\"count\":171},{\"id\":\"OE_INTERRUPT\",\"count\":2},{\"id\":\"OE_ASSESS\",\"count\":23},{\"id\":\"OE_END\",\"count\":1}],\"syncDate\":1474351045034,\"contentType\":\"Story\",\"mimeType\":\"application/vnd.ekstep.ecml-archive\",\"itemResponses\":[{\"time_stamp\":1474350574049,\"score\":1,\"timeSpent\":5.0,\"mmc\":[],\"res\":[\"5:D\"],\"resValues\":[{\"5\":\"D\"}],\"itemId\":\"esl.l3q28\",\"mc\":[]},{\"time_stamp\":1474350575965,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q29\",\"mc\":[]},{\"time_stamp\":1474350577358,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q30\",\"mc\":[]},{\"time_stamp\":1474350578565,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q31\",\"mc\":[]},{\"time_stamp\":1474350579836,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q32\",\"mc\":[]},{\"time_stamp\":1474350581019,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q33\",\"mc\":[]},{\"time_stamp\":1474350582208,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q34\",\"mc\":[]},{\"time_stamp\":1474350583517,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q35\",\"mc\":[]},{\"time_stamp\":1474350584901,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q36\",\"mc\":[]},{\"time_stamp\":1474350586118,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q37\",\"mc\":[]},{\"time_stamp\":1474350601438,\"score\":0,\"timeSpent\":9.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q38\",\"mc\":[]},{\"time_stamp\":1474350602642,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q39\",\"mc\":[]},{\"time_stamp\":1474350603195,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q40\",\"mc\":[]},{\"time_stamp\":1474350603602,\"score\":0,\"timeSpent\":0.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q41\",\"mc\":[]},{\"time_stamp\":1474350606416,\"score\":0,\"timeSpent\":3.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q42\",\"mc\":[]},{\"time_stamp\":1474350607301,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q43\",\"mc\":[]},{\"time_stamp\":1474350608010,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q44\",\"mc\":[]},{\"time_stamp\":1474350610031,\"score\":0,\"timeSpent\":2.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q45\",\"mc\":[]},{\"time_stamp\":1474350611213,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q46\",\"mc\":[]},{\"time_stamp\":1474350621749,\"score\":0,\"timeSpent\":11.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q47\",\"mc\":[]},{\"time_stamp\":1474350622758,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q48\",\"mc\":[]},{\"time_stamp\":1474350623511,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q49\",\"mc\":[]},{\"time_stamp\":1474350624180,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q50\",\"mc\":[]}]}},\"etags\":{}}"
- val derivedEvent2 = JSONUtils.deserialize[DerivedEvent](dEvent2);
- CommonUtil.getValidTags(derivedEvent2, Array("test"))
-
- val dEvent3 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1474439187443,\"syncts\":1474351045034,\"ver\":\"1.0\",\"mid\":\"0C2CE73054050FE7D0E03B5A71A35829\",\"uid\":\"3b81dc76-917c-4a67-9f08-1d84b201820c\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"LearnerSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1474350532673,\"to\":1474350628540}},\"dimensions\":{\"did\":\"38e8baf2f7d2fa48fd73dac95dec23348992a907\",\"gdata\":{\"id\":\"do_30043182\",\"ver\":\"10\"},\"loc\":\"\",\"group_user\":false,\"anonymous_user\":true},\"edata\":{\"eks\":{\"levels\":[],\"noOfAttempts\":1,\"timeSpent\":95.87,\"interruptTime\":8.38,\"timeDiff\":95.87,\"start_time\":1474350532673,\"end_time\":1474350628540,\"currentLevel\":{},\"noOfLevelTransitions\":-1,\"interactEventsPerMin\":107.65,\"completionStatus\":false,\"screenSummary\":[{\"id\":\"reading_word_stage\",\"timeSpent\":5.33},{\"id\":\"homeScreen\",\"timeSpent\":6.37},{\"id\":\"writing_assess_stage\",\"timeSpent\":17.09},{\"id\":\"assessment_stage_three\",\"timeSpent\":7.72},{\"id\":\"splash\",\"timeSpent\":6.79},{\"id\":\"assessment_stage_one\",\"timeSpent\":10.92},{\"id\":\"writing_stage\",\"timeSpent\":4.36},{\"id\":\"assessment_stage_two\",\"timeSpent\":4.35},{\"id\":\"endScreen\",\"timeSpent\":4.31},{\"id\":\"reading_stage\",\"timeSpent\":20.27}],\"noOfInteractEvents\":172,\"eventsSummary\":[{\"id\":\"OE_ITEM_RESPONSE\",\"count\":1},{\"id\":\"OE_START\",\"count\":1},{\"id\":\"OE_NAVIGATE\",\"count\":13},{\"id\":\"OE_INTERACT\",\"count\":171},{\"id\":\"OE_INTERRUPT\",\"count\":2},{\"id\":\"OE_ASSESS\",\"count\":23},{\"id\":\"OE_END\",\"count\":1}],\"syncDate\":1474351045034,\"contentType\":\"Story\",\"mimeType\":\"application/vnd.ekstep.ecml-archive\",\"itemResponses\":[{\"time_stamp\":1474350574049,\"score\":1,\"timeSpent\":5.0,\"mmc\":[],\"res\":[\"5:D\"],\"resValues\":[{\"5\":\"D\"}],\"itemId\":\"esl.l3q28\",\"mc\":[]},{\"time_stamp\":1474350575965,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q29\",\"mc\":[]},{\"time_stamp\":1474350577358,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q30\",\"mc\":[]},{\"time_stamp\":1474350578565,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q31\",\"mc\":[]},{\"time_stamp\":1474350579836,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q32\",\"mc\":[]},{\"time_stamp\":1474350581019,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q33\",\"mc\":[]},{\"time_stamp\":1474350582208,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q34\",\"mc\":[]},{\"time_stamp\":1474350583517,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q35\",\"mc\":[]},{\"time_stamp\":1474350584901,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q36\",\"mc\":[]},{\"time_stamp\":1474350586118,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q37\",\"mc\":[]},{\"time_stamp\":1474350601438,\"score\":0,\"timeSpent\":9.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q38\",\"mc\":[]},{\"time_stamp\":1474350602642,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q39\",\"mc\":[]},{\"time_stamp\":1474350603195,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q40\",\"mc\":[]},{\"time_stamp\":1474350603602,\"score\":0,\"timeSpent\":0.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q41\",\"mc\":[]},{\"time_stamp\":1474350606416,\"score\":0,\"timeSpent\":3.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q42\",\"mc\":[]},{\"time_stamp\":1474350607301,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q43\",\"mc\":[]},{\"time_stamp\":1474350608010,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q44\",\"mc\":[]},{\"time_stamp\":1474350610031,\"score\":0,\"timeSpent\":2.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q45\",\"mc\":[]},{\"time_stamp\":1474350611213,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q46\",\"mc\":[]},{\"time_stamp\":1474350621749,\"score\":0,\"timeSpent\":11.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q47\",\"mc\":[]},{\"time_stamp\":1474350622758,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q48\",\"mc\":[]},{\"time_stamp\":1474350623511,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q49\",\"mc\":[]},{\"time_stamp\":1474350624180,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q50\",\"mc\":[]}]}},\"etags\":{\"app\":[\"test\", \"QA\"]}}"
- val derivedEvent3 = JSONUtils.deserialize[DerivedEvent](dEvent3);
- val out = CommonUtil.getValidTags(derivedEvent3, Array("test"))
- out.length should be(1)
-
- //zip
- CommonUtil.zip("src/test/resources/test.zip", List("src/test/resources/sample_telemetry.log", "src/test/resources/sample_telemetry_2.log"))
- new File("src/test/resources/test.zip").isFile() should be(true)
- CommonUtil.deleteFile("src/test/resources/test.zip");
- //zip folder
- //CommonUtil.zipFolder("src/test/resources/zipFolderTest.zip", "src/test/resources/1234/OE_INTERACT")
- //new File("src/test/resources/zipFolderTest.zip").isFile() should be(true)
- //CommonUtil.deleteFile("src/test/resources/zipFolderTest.zip");
-
- //ccToMap
- val x = CommonUtil.caseClassToMap(DerivedEvent)
-
- //zip dir
- CommonUtil.zipDir("src/test/resources/test.zip", "src/test/resources/1234")
- new File("src/test/resources/test.zip").isFile() should be(true)
- CommonUtil.deleteFile("src/test/resources/test.zip");
-
- //getChanneId
- val event = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}"
- val channelId = CommonUtil.getChannelId(JSONUtils.deserialize[Event](event))
- channelId should be("in.ekstep")
-
- val drivedEvent = "{\"eid\":\"ME_CE_SESSION_SUMMARY\",\"ets\":1495515314134,\"syncts\":1495456436116,\"ver\":\"1.0\",\"mid\":\"37E9E91997249D12F06C1D4869E286DE\",\"uid\":\"562\",\"content_id\":\"do_2122315986551685121193\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"ContentEditorSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1495456435738,\"to\":1495456436116}},\"dimensions\":{\"sid\":\"5edg6dsos4bun8q8utp0k9gqa0\"},\"edata\":{\"eks\":{\"interact_events_per_min\":0.0,\"start_time\":1495456435738,\"plugin_summary\":{\"loaded_count\":0,\"plugins_added\":0,\"plugins_removed\":0,\"plugins_modified\":0,\"per_plugin_summary\":[]},\"menu_events_count\":0,\"interact_events_count\":0,\"end_time\":1495456436116,\"events_summary\":[{\"id\":\"CE_API_CALL\",\"count\":3}],\"sidebar_events_count\":0,\"time_diff\":0.38,\"api_calls_count\":3,\"stage_summary\":{\"stages_added\":0,\"stages_removed\":0,\"stages_modified\":0},\"load_time\":0.0,\"save_summary\":{\"total_count\":0,\"success_count\":0,\"failed_count\":0},\"time_spent\":0.38}}}"
- val channelId1 = CommonUtil.getChannelId(JSONUtils.deserialize[DerivedEvent](drivedEvent))
- channelId1 should be("in.ekstep")
-
- val profileEvent = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1453207660735,\"syncts\":1453207660735,\"ver\":\"1.0\",\"uid\":\"8b4f3775-6f65-4abf-9afa-b15b8f82a24b\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"GenericSessionSummarizer\",\"ver\":\"1.1\"},\"granularity\":\"SESSION\",\"dt_range\":{\"from\":1450079174000,\"to\":1450079337000}},\"dimensions\":{\"gdata\":{\"id\":\"org.ekstep.aser\",\"ver\":\"5.6.1\"},\"loc\":\"22.6370684,77.5506687\"},\"edata\":{\"eks\":{\"startTime\":1450079174000,\"noOfLevelTransitions\":1,\"levels\":[{\"choices\":[],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can do subtraction\"},{\"choices\":[\"q_4_s_hindi\",\"q_sub_q1127\",\"q_sub_q1126\"],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can read story\"}],\"activitySummary\":{\"TOUCH\":{\"count\":21,\"timeSpent\":161.0}},\"noOfAttempts\":1,\"timeSpent\":6206.0,\"interactEventsPerMin\":0.2,\"endTime\":1450079337000,\"eventsSummary\":{\"OE_START\":1,\"OE_INTERACT\":21,\"OE_ASSESS\":3,\"OE_END\":1,\"OE_LEVEL_SET\":2},\"currentLevel\":{\"numeracy\":\"Can do subtraction\",\"literacy\":\"Can read story\"},\"noOfInteractEvents\":21,\"interruptTime\":0.0,\"itemResponses\":[{\"itemId\":\"q_4_s_hindi\",\"itype\":\"recognition\",\"ilevel\":\"MEDIUM\",\"timeSpent\":29.0,\"res\":[\"अत्चा\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079266000,\"maxScore\":1,\"domain\":\"literacy\"},{\"itemId\":\"q_sub_q1127\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":33.0,\"res\":[\"49\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079299000,\"maxScore\":1,\"domain\":\"numeracy\"},{\"itemId\":\"q_sub_q1126\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":27.0,\"res\":[\"17\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079322000,\"maxScore\":1,\"domain\":\"numeracy\"}]}}}"
- val channelId2 = CommonUtil.getChannelId(JSONUtils.deserialize[ProfileEvent](profileEvent))
- channelId2 should be("in.ekstep")
-
- // getAppDetails
- val event1 = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}"
- val appId = CommonUtil.getAppDetails(JSONUtils.deserialize[Event](event))
- appId.id should be("genie")
-
- val drivedEvent1 = "{\"eid\":\"ME_CE_SESSION_SUMMARY\",\"ets\":1495515314134,\"syncts\":1495456436116,\"ver\":\"1.0\",\"mid\":\"37E9E91997249D12F06C1D4869E286DE\",\"uid\":\"562\",\"content_id\":\"do_2122315986551685121193\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"ContentEditorSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1495456435738,\"to\":1495456436116}},\"dimensions\":{\"sid\":\"5edg6dsos4bun8q8utp0k9gqa0\"},\"edata\":{\"eks\":{\"interact_events_per_min\":0.0,\"start_time\":1495456435738,\"plugin_summary\":{\"loaded_count\":0,\"plugins_added\":0,\"plugins_removed\":0,\"plugins_modified\":0,\"per_plugin_summary\":[]},\"menu_events_count\":0,\"interact_events_count\":0,\"end_time\":1495456436116,\"events_summary\":[{\"id\":\"CE_API_CALL\",\"count\":3}],\"sidebar_events_count\":0,\"time_diff\":0.38,\"api_calls_count\":3,\"stage_summary\":{\"stages_added\":0,\"stages_removed\":0,\"stages_modified\":0},\"load_time\":0.0,\"save_summary\":{\"total_count\":0,\"success_count\":0,\"failed_count\":0},\"time_spent\":0.38}}}"
- val appId1 = CommonUtil.getAppDetails(JSONUtils.deserialize[DerivedEvent](drivedEvent))
- appId1.id should be("genie")
-
- val profileEvent1 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1453207660735,\"syncts\":1453207660735,\"ver\":\"1.0\",\"uid\":\"8b4f3775-6f65-4abf-9afa-b15b8f82a24b\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"GenericSessionSummarizer\",\"ver\":\"1.1\"},\"granularity\":\"SESSION\",\"dt_range\":{\"from\":1450079174000,\"to\":1450079337000}},\"dimensions\":{\"gdata\":{\"id\":\"org.ekstep.aser\",\"ver\":\"5.6.1\"},\"loc\":\"22.6370684,77.5506687\"},\"edata\":{\"eks\":{\"startTime\":1450079174000,\"noOfLevelTransitions\":1,\"levels\":[{\"choices\":[],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can do subtraction\"},{\"choices\":[\"q_4_s_hindi\",\"q_sub_q1127\",\"q_sub_q1126\"],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can read story\"}],\"activitySummary\":{\"TOUCH\":{\"count\":21,\"timeSpent\":161.0}},\"noOfAttempts\":1,\"timeSpent\":6206.0,\"interactEventsPerMin\":0.2,\"endTime\":1450079337000,\"eventsSummary\":{\"OE_START\":1,\"OE_INTERACT\":21,\"OE_ASSESS\":3,\"OE_END\":1,\"OE_LEVEL_SET\":2},\"currentLevel\":{\"numeracy\":\"Can do subtraction\",\"literacy\":\"Can read story\"},\"noOfInteractEvents\":21,\"interruptTime\":0.0,\"itemResponses\":[{\"itemId\":\"q_4_s_hindi\",\"itype\":\"recognition\",\"ilevel\":\"MEDIUM\",\"timeSpent\":29.0,\"res\":[\"अत्चा\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079266000,\"maxScore\":1,\"domain\":\"literacy\"},{\"itemId\":\"q_sub_q1127\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":33.0,\"res\":[\"49\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079299000,\"maxScore\":1,\"domain\":\"numeracy\"},{\"itemId\":\"q_sub_q1126\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":27.0,\"res\":[\"17\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079322000,\"maxScore\":1,\"domain\":\"numeracy\"}]}}}"
- val appId2 = CommonUtil.getAppDetails(JSONUtils.deserialize[ProfileEvent](profileEvent1))
- appId2.id should be("genie")
-
- //getEndTimestampOfDay
- val time = CommonUtil.getEndTimestampOfDay("2016-01-02")
- time.toString() should be ("1451759399000")
-
- // dayPeriodToLong
- val dayPeriodToLong = CommonUtil.dayPeriodToLong(20170713)
- dayPeriodToLong.toString should be("1499904000000")
-
- // getWeeksBetween
- val getWeeksBetween = CommonUtil.getWeeksBetween(1499904L, 1451759399L)
- getWeeksBetween should be(2)
-
- // getMetricEvent
- val metricEvent = CommonUtil.getMetricEvent(Map("system" -> "DataProduct", "subsystem" -> "test", "metrics" -> List(V3MetricEdata("count", "100".asInstanceOf[AnyRef]))), "pipeline-monitoring", "dataproduct-metric")
- metricEvent.context.pdata.get.id should be("pipeline-monitoring")
- metricEvent.context.pdata.get.pid.get should be("dataproduct-metric")
-
- val epochToTimestamp = CommonUtil.getTimestampFromEpoch(1537550355883L)
- epochToTimestamp.toString should be("2018-09-21 17:19:15.883")
-
- val connectionProperties = CommonUtil.getPostgresConnectionProps()
- connectionProperties.getProperty("user") should be("postgres")
- connectionProperties.getProperty("password") should be("postgres")
- connectionProperties.getProperty("driver") should be("org.postgresql.Driver")
+ private case class TestCaseClass(mid: String, date: DateTime);
+
+ class TestEventListener() {
+ var event: String = _;
+ @Subscribe def onMessage(event: String) {
+ this.event = event;
+ }
+ }
+
+ it should "pass test case of all methods in CommonUtil" in {
+ try {
+ //datesBetween
+ val from = new LocalDate("2016-01-01");
+ val to = new LocalDate("2016-01-04");
+ CommonUtil.datesBetween(from, to).toArray should be(Array(new LocalDate("2016-01-01"), new LocalDate("2016-01-02"), new LocalDate("2016-01-03"), new LocalDate("2016-01-04")))
+
+ //deleteDirectory
+ val path = "delete-this";
+ val dir = new File(path)
+ val dirCreated = dir.mkdir;
+ dirCreated should be(true);
+ val fp = "delete-this/delete-this.txt";
+ val f = new File(fp);
+ f.createNewFile();
+ CommonUtil.deleteDirectory(path)
+ dir.isDirectory() should be(false);
+ f.isFile() should be(false);
+
+ val sc = CommonUtil.getSparkContext(1, "test", None, None);
+ (new HadoopFileUtil()).delete("delete-this/delete-this.txt", sc.hadoopConfiguration);
+ sc.stop();
+
+ //deleteFile
+ val filePath = "delete-this.txt";
+ val noFile = "no-file.txt"
+ val file = new File(filePath);
+ val created = file.createNewFile();
+ created should be(true);
+ CommonUtil.deleteFile(filePath)
+ CommonUtil.deleteFile(noFile)
+ file.isFile() should be(false);
+
+ //getAge
+ val dateformat = new SimpleDateFormat("dd/MM/yyyy");
+ val dob = dateformat.parse("04/07/1990");
+ CommonUtil.getAge(dob) should be > (25)
+
+ //getDatesBetween
+ CommonUtil.getDatesBetween("2016-01-01", Option("2016-01-04")) should be(Array("2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04"))
+ CommonUtil.getDatesBetween("2016-01-01", None) should not be null;
+
+ //getEvent
+ val line = "{\"eid\":\"OE_START\",\"ts\":\"2016-01-01T12:13:20+05:30\",\"@timestamp\":\"2016-01-02T00:59:22.924Z\",\"ver\":\"1.0\",\"gdata\":{\"id\":\"org.ekstep.aser.lite\",\"ver\":\"5.7\"},\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
+ val event = JSONUtils.deserialize[Event](line);
+ val line2 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
+ val event2 = JSONUtils.deserialize[Event](line2);
+ val line3 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02T00:59:22+05:30\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
+ val event3 = JSONUtils.deserialize[Event](line3);
+ val line4 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02T00:59:22P:ST\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
+ val event4 = JSONUtils.deserialize[Event](line4);
+ val line5 = "{\"eid\":\"OE_START\",\"ets\":1451630600000,\"@timestamp\":\"2016-01-02T00:59:22.924Z\",\"ver\":\"1.0\",\"gdata\":{\"id\":\"org.ekstep.aser.lite\",\"ver\":\"5.7\"},\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}";
+ val event5 = JSONUtils.deserialize[Event](line5);
+
+ //getEventDate yyyy-MM-dd'T'HH:mm:ssZZ
+ val evDate = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ssZZ").parseLocalDate("2016-01-01T12:13:20+05:30").toDate;
+ CommonUtil.getEventDate(event) should be(evDate)
+
+ //getEventTs
+ CommonUtil.getEventTS(event) should be(1451630600000L)
+ CommonUtil.getEventTS(event5) should be(1451630600000L)
+ CommonUtil.getEventSyncTS(event) should be(1451696362924L)
+ CommonUtil.getEventSyncTS(event2) should be(0L)
+ CommonUtil.getEventSyncTS(event3) should be(1451676562000L)
+ CommonUtil.getEventSyncTS(event4) should be(1451696362000L)
+
+ CommonUtil.getEventTS(event2) should be(0)
+
+ CommonUtil.getEventDate(event2) should be(null)
+
+ //getGameId
+ CommonUtil.getGameId(event) should be("org.ekstep.aser.lite")
+ CommonUtil.getGameId(event2) should be(null)
+
+ //getGameVersion
+ CommonUtil.getGameVersion(event) should be("5.7")
+ CommonUtil.getGameVersion(event2) should be(null)
+
+ //getHourOfDay
+ CommonUtil.getHourOfDay(1447154514000L, 1447158114000L) should be(ListBuffer(11, 12))
+ CommonUtil.getHourOfDay(1447154514000L, 1447000L) should be(ListBuffer(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0))
+
+ //getParallelization
+ val config = new JobConfig(null, None, None, null, None, None, Option(10), Option("testApp"), Option(false));
+ CommonUtil.getParallelization(config) should be(10)
+
+ val config2 = new JobConfig(null, None, None, null, None, None, None, Option("testApp"), Option(false));
+ CommonUtil.getParallelization(config) should be(10)
+
+ //getParallelization
+ val con = Option(Map("search" -> null, "filters" -> null, "sort" -> null, "model" -> null, "modelParams" -> null, "output" -> null, "parallelization" -> "10", "appName" -> "testApp", "deviceMapping" -> null))
+ CommonUtil.getParallelization(con) should be(10)
+
+ //getStartDate
+ CommonUtil.getStartDate(Option("2016-01-08"), 7) should be(Option("2016-01-01"))
+ CommonUtil.getStartDate(None, 0) should be(Option(LocalDate.fromDateFields(new Date).toString()))
+
+ //getTimeDiff
+ CommonUtil.getTimeDiff(1451650400000L, 1451650410000L) should be(Option(10d))
+ CommonUtil.getTimeDiff(1451650400000L, 1451650410000L) should be(Option(10d))
+
+ CommonUtil.getTimeDiff(event, event) should be(Option(0d))
+ CommonUtil.getTimeDiff(event, event2) should be(Option(0d))
+
+ //getTimeSpent
+ CommonUtil.getTimeSpent("10") should be(Option(10d))
+ CommonUtil.getTimeSpent(10d.asInstanceOf[AnyRef]) should be(Option(10d))
+ CommonUtil.getTimeSpent(10.asInstanceOf[AnyRef]) should be(Option(10d))
+ CommonUtil.getTimeSpent(null) should be(Option(0d))
+ CommonUtil.getTimeSpent(true.asInstanceOf[AnyRef]) should be(Option(0d))
+
+ CommonUtil.getTimestamp("2016-01-02T00:59:22+P:ST") should be(1451696362000L);
+
+ CommonUtil.roundDouble(12.7345, 2) should be(12.73);
+
+ //gzip
+ val testPath = "src/test/resources/sample_telemetry.log";
+ CommonUtil.gzip(testPath)
+ new File("src/test/resources/sample_telemetry.log.gz").isFile() should be(true)
+ CommonUtil.deleteFile("src/test/resources/sample_telemetry.log.gz");
+
+ a[Exception] should be thrownBy {
+ CommonUtil.gzip("src/test/resources/sample_telemetry.txt")
+ }
+
+ CommonUtil.getParallelization(None) should be(10);
+
+ CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L)) should be("1D99B2F1C6637AE21081CD981AFFB56F");
+ CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L), "org.ekstep.aser.lite") should be("6D5DCB288B1A9BC3036D04C37FF08EDF");
+
+ CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L), "content1", Option("app1"), Option("channel1"), "device1") should be("4DE94D28FB211D935B70DADBEB8B45EA");
+ CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L), "content1", None, None, "device1") should be("B5D001443E9BEFF7884FFB1F9B2A5CAD");
+
+ CommonUtil.getMessageId("ME_TEST", "INFO", 1451650400000L, Option("sunbird.app"), None) should be("C0D5CA578D9F8889CDB2C09FF4899FAC");
+ CommonUtil.getMessageId("ME_TEST", "INFO", 1451650400000L, None, Option("testchannel")) should be("6625F709DD90A7423F0332826DE0F386");
+
+ CommonUtil.getMessageId("ME_TEST", "123", "MONTH", 1451650400000L, None, None) should be("D0BF57F856E3B7FAD5E47CCD4B31DE57");
+
+ val res = CommonUtil.time({
+
+ CommonUtil.getWeeksBetween(1451650400000L, 1454650400000L) should be(5)
+ CommonUtil.getPeriod(1451650400000L, DAY) should be(20160101)
+ CommonUtil.getPeriod(1451650400000L, WEEK) should be(2015753)
+ CommonUtil.getPeriod(1452250748000L, WEEK) should be(2016701)
+ CommonUtil.getPeriod(1451650400000L, MONTH) should be(201601)
+ CommonUtil.getPeriod(1451650400000L, CUMULATIVE) should be(0)
+ CommonUtil.getPeriod(1451650400000L, LAST7) should be(7)
+ CommonUtil.getPeriod(1451650400000L, LAST30) should be(30)
+ CommonUtil.getPeriod(1451650400000L, LAST90) should be(90)
+ CommonUtil.getPeriod(new DateTime("2016-01-01"), DAY) should be(20160101)
+
+ })
+ res._1 should be > (0L)
+
+ //getTags
+ val metaData1 = Map("tags" -> List("test", "QA"), "activation_keys" -> "ptm007")
+ val tags1 = CommonUtil.getTags(metaData1).get
+ tags1.length should be(2)
+
+ val metaData2 = Map("activation_keys" -> "ptm007", "tags" -> null)
+ val tags2 = CommonUtil.getTags(metaData2).get
+ tags2.length should be(0)
+
+ val metaData3 = Map("activation_keys" -> "ptm007")
+ val tags3 = CommonUtil.getTags(metaData3).get
+ tags3.length should be(0)
+
+ CommonUtil.daysBetween(new DateTime(1451650400000L).toLocalDate(), new DateTime(1454650400000L).toLocalDate()) should be(35);
+ } catch {
+ case ex: Exception => ex.printStackTrace();
+ }
+
+ CommonUtil.getPathFromURL("https://ekstep-public.s3-ap-southeast-1.amazonaws.com/ecar_files/domain_38527_1460631037666.ecar") should be("/ecar_files/domain_38527_1460631037666.ecar")
+
+ // getPeriods
+ val daysArray = CommonUtil.getPeriods(DAY, 5)
+ daysArray.length should be(5)
+
+ val weeksArray = CommonUtil.getPeriods(WEEK, 5)
+ weeksArray.length should be(5)
+
+ val monthsArray = CommonUtil.getPeriods(MONTH, 5)
+ monthsArray.length should be(5)
+
+ val cumArray = CommonUtil.getPeriods(CUMULATIVE, 5)
+ cumArray.length should be(1)
+
+ CommonUtil.getPeriods("DAY", 5)
+ CommonUtil.getPeriods("WEEK", 5)
+ CommonUtil.getPeriods("MONTH", 5)
+ CommonUtil.getPeriods("CUMULATIVE", 5)
+
+ //zip
+ CommonUtil.zip("src/test/resources/test.zip", List("src/test/resources/sample_telemetry.log", "src/test/resources/sample_telemetry_2.log"))
+ new File("src/test/resources/test.zip").isFile() should be(true)
+ CommonUtil.deleteFile("src/test/resources/test.zip");
+ //zip folder
+ //CommonUtil.zipFolder("src/test/resources/zipFolderTest.zip", "src/test/resources/1234/OE_INTERACT")
+ //new File("src/test/resources/zipFolderTest.zip").isFile() should be(true)
+ //CommonUtil.deleteFile("src/test/resources/zipFolderTest.zip");
+
+ //ccToMap
+ val x = CommonUtil.caseClassToMap(DerivedEvent)
+
+ //zip dir
+ CommonUtil.zipDir("src/test/resources/test.zip", "src/test/resources/1234")
+ new File("src/test/resources/test.zip").isFile() should be(true)
+ CommonUtil.deleteFile("src/test/resources/test.zip");
+
+ //getChanneId
+ val event = "{\"eid\":\"OE_INTERACT\", \"channel\": \"sunbird\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}"
+ val channelId = CommonUtil.getChannelId(JSONUtils.deserialize[Event](event))
+ channelId should be("sunbird")
+
+ CommonUtil.getChannelId(JSONUtils.deserialize[Event]("{\"eid\":\"OE_INTERACT\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}")) should be("in.ekstep")
+
+ val drivedEvent = "{\"eid\":\"ME_CE_SESSION_SUMMARY\",\"ets\":1495515314134,\"syncts\":1495456436116,\"ver\":\"1.0\",\"mid\":\"37E9E91997249D12F06C1D4869E286DE\",\"uid\":\"562\",\"content_id\":\"do_2122315986551685121193\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"ContentEditorSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1495456435738,\"to\":1495456436116}},\"dimensions\":{\"sid\":\"5edg6dsos4bun8q8utp0k9gqa0\"},\"edata\":{\"eks\":{\"interact_events_per_min\":0.0,\"start_time\":1495456435738,\"plugin_summary\":{\"loaded_count\":0,\"plugins_added\":0,\"plugins_removed\":0,\"plugins_modified\":0,\"per_plugin_summary\":[]},\"menu_events_count\":0,\"interact_events_count\":0,\"end_time\":1495456436116,\"events_summary\":[{\"id\":\"CE_API_CALL\",\"count\":3}],\"sidebar_events_count\":0,\"time_diff\":0.38,\"api_calls_count\":3,\"stage_summary\":{\"stages_added\":0,\"stages_removed\":0,\"stages_modified\":0},\"load_time\":0.0,\"save_summary\":{\"total_count\":0,\"success_count\":0,\"failed_count\":0},\"time_spent\":0.38}}}"
+ val channelId1 = CommonUtil.getChannelId(JSONUtils.deserialize[DerivedEvent](drivedEvent))
+ channelId1 should be("in.ekstep")
+
+ val profileEvent = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1453207660735,\"syncts\":1453207660735,\"ver\":\"1.0\",\"uid\":\"8b4f3775-6f65-4abf-9afa-b15b8f82a24b\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"GenericSessionSummarizer\",\"ver\":\"1.1\"},\"granularity\":\"SESSION\",\"dt_range\":{\"from\":1450079174000,\"to\":1450079337000}},\"dimensions\":{\"gdata\":{\"id\":\"org.ekstep.aser\",\"ver\":\"5.6.1\"},\"loc\":\"22.6370684,77.5506687\"},\"edata\":{\"eks\":{\"startTime\":1450079174000,\"noOfLevelTransitions\":1,\"levels\":[{\"choices\":[],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can do subtraction\"},{\"choices\":[\"q_4_s_hindi\",\"q_sub_q1127\",\"q_sub_q1126\"],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can read story\"}],\"activitySummary\":{\"TOUCH\":{\"count\":21,\"timeSpent\":161.0}},\"noOfAttempts\":1,\"timeSpent\":6206.0,\"interactEventsPerMin\":0.2,\"endTime\":1450079337000,\"eventsSummary\":{\"OE_START\":1,\"OE_INTERACT\":21,\"OE_ASSESS\":3,\"OE_END\":1,\"OE_LEVEL_SET\":2},\"currentLevel\":{\"numeracy\":\"Can do subtraction\",\"literacy\":\"Can read story\"},\"noOfInteractEvents\":21,\"interruptTime\":0.0,\"itemResponses\":[{\"itemId\":\"q_4_s_hindi\",\"itype\":\"recognition\",\"ilevel\":\"MEDIUM\",\"timeSpent\":29.0,\"res\":[\"अत्चा\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079266000,\"maxScore\":1,\"domain\":\"literacy\"},{\"itemId\":\"q_sub_q1127\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":33.0,\"res\":[\"49\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079299000,\"maxScore\":1,\"domain\":\"numeracy\"},{\"itemId\":\"q_sub_q1126\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":27.0,\"res\":[\"17\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079322000,\"maxScore\":1,\"domain\":\"numeracy\"}]}}}"
+ val channelId2 = CommonUtil.getChannelId(JSONUtils.deserialize[ProfileEvent](profileEvent))
+ channelId2 should be("in.ekstep")
+
+ CommonUtil.getChannelId("") should be("in.ekstep")
+
+ CommonUtil.getChannelId(new V3Event(null, 0l, null, null, null, null, V3Context(null, Option(V3PData("sunbird.app", Option("2.0"))), null, None, None, None, None), None, null)) should be("in.ekstep")
+ CommonUtil.getChannelId(new V3Event(null, 0l, null, null, null, null, V3Context("sunbird", Option(V3PData("sunbird.app", None)), null, None, None, None, None), None, null)) should be("sunbird")
+ CommonUtil.getChannelId(DerivedEvent(null, 0l, 0l, null, null, null, "sunbird", None, None, null, Dimensions(None, None, None, None, None, None, Option(PData("sunbird.app", "1.0"))), null)) should be("sunbird")
+ CommonUtil.getChannelId(DerivedEvent(null, 0l, 0l, null, null, null, "sunbird", None, None, null, Dimensions(None, None, None, None, None, None, Option(PData("sunbird.app", "1.0")), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, Option("sunbird")), null)) should be("sunbird")
+ CommonUtil.getChannelId(new ProfileEvent(null, null, null, null, null, null, null, null, Option(new PData("sunbird.app", "2.0")), Option("sunbird"), null)) should be("sunbird")
+
+ // getAppDetails
+ val event1 = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}"
+ val appId = CommonUtil.getAppDetails(JSONUtils.deserialize[Event](event))
+ appId.id should be("genie")
+
+ val event2 = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"pdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}"
+ val appId3 = CommonUtil.getAppDetails(JSONUtils.deserialize[Event](event2))
+ appId3.id should be("org.ekstep.story.en.haircut")
+
+ val drivedEvent1 = "{\"eid\":\"ME_CE_SESSION_SUMMARY\",\"ets\":1495515314134,\"syncts\":1495456436116,\"ver\":\"1.0\",\"mid\":\"37E9E91997249D12F06C1D4869E286DE\",\"uid\":\"562\",\"content_id\":\"do_2122315986551685121193\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"ContentEditorSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1495456435738,\"to\":1495456436116}},\"dimensions\":{\"sid\":\"5edg6dsos4bun8q8utp0k9gqa0\"},\"edata\":{\"eks\":{\"interact_events_per_min\":0.0,\"start_time\":1495456435738,\"plugin_summary\":{\"loaded_count\":0,\"plugins_added\":0,\"plugins_removed\":0,\"plugins_modified\":0,\"per_plugin_summary\":[]},\"menu_events_count\":0,\"interact_events_count\":0,\"end_time\":1495456436116,\"events_summary\":[{\"id\":\"CE_API_CALL\",\"count\":3}],\"sidebar_events_count\":0,\"time_diff\":0.38,\"api_calls_count\":3,\"stage_summary\":{\"stages_added\":0,\"stages_removed\":0,\"stages_modified\":0},\"load_time\":0.0,\"save_summary\":{\"total_count\":0,\"success_count\":0,\"failed_count\":0},\"time_spent\":0.38}}}"
+ val appId1 = CommonUtil.getAppDetails(JSONUtils.deserialize[DerivedEvent](drivedEvent))
+ appId1.id should be("genie")
+
+ val profileEvent1 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1453207660735,\"syncts\":1453207660735,\"ver\":\"1.0\",\"uid\":\"8b4f3775-6f65-4abf-9afa-b15b8f82a24b\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"GenericSessionSummarizer\",\"ver\":\"1.1\"},\"granularity\":\"SESSION\",\"dt_range\":{\"from\":1450079174000,\"to\":1450079337000}},\"dimensions\":{\"gdata\":{\"id\":\"org.ekstep.aser\",\"ver\":\"5.6.1\"},\"loc\":\"22.6370684,77.5506687\"},\"edata\":{\"eks\":{\"startTime\":1450079174000,\"noOfLevelTransitions\":1,\"levels\":[{\"choices\":[],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can do subtraction\"},{\"choices\":[\"q_4_s_hindi\",\"q_sub_q1127\",\"q_sub_q1126\"],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can read story\"}],\"activitySummary\":{\"TOUCH\":{\"count\":21,\"timeSpent\":161.0}},\"noOfAttempts\":1,\"timeSpent\":6206.0,\"interactEventsPerMin\":0.2,\"endTime\":1450079337000,\"eventsSummary\":{\"OE_START\":1,\"OE_INTERACT\":21,\"OE_ASSESS\":3,\"OE_END\":1,\"OE_LEVEL_SET\":2},\"currentLevel\":{\"numeracy\":\"Can do subtraction\",\"literacy\":\"Can read story\"},\"noOfInteractEvents\":21,\"interruptTime\":0.0,\"itemResponses\":[{\"itemId\":\"q_4_s_hindi\",\"itype\":\"recognition\",\"ilevel\":\"MEDIUM\",\"timeSpent\":29.0,\"res\":[\"अत्चा\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079266000,\"maxScore\":1,\"domain\":\"literacy\"},{\"itemId\":\"q_sub_q1127\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":33.0,\"res\":[\"49\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079299000,\"maxScore\":1,\"domain\":\"numeracy\"},{\"itemId\":\"q_sub_q1126\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":27.0,\"res\":[\"17\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079322000,\"maxScore\":1,\"domain\":\"numeracy\"}]}}}"
+ val appId2 = CommonUtil.getAppDetails(JSONUtils.deserialize[ProfileEvent](profileEvent1))
+ appId2.id should be("genie")
+
+ CommonUtil.getAppDetails(new V3Event(null, 0l, null, null, null, null, V3Context(null, Option(V3PData("sunbird.app", Option("2.0"))), null, None, None, None, None), None, null)).id should be("sunbird.app")
+ CommonUtil.getAppDetails(new V3Event(null, 0l, null, null, null, null, V3Context(null, Option(V3PData("sunbird.app", None)), null, None, None, None, None), None, null)).id should be("sunbird.app")
+ CommonUtil.getAppDetails(new V3Event(null, 0l, null, null, null, null, V3Context(null, None, null, None, None, None, None), None, null)).id should be("genie")
+
+ CommonUtil.getAppDetails(new ProfileEvent(null, null, null, null, null, null, null, null, Option(new PData("sunbird.app", "2.0")), None, null)).id should be("sunbird.app")
+ CommonUtil.getAppDetails(DerivedEvent(null, 0l, 0l, null, null, null, null, None, None, null, Dimensions(None, None, None, None, None, None, Option(PData("sunbird.app", "1.0"))), null)).id should be("sunbird.app")
+ CommonUtil.getAppDetails("").id should be("genie");
+
+ //getEndTimestampOfDay
+ val time = CommonUtil.getEndTimestampOfDay("2016-01-02")
+ time.toString() should be("1451759399000")
+
+ // dayPeriodToLong
+ val dayPeriodToLong = CommonUtil.dayPeriodToLong(20170713)
+ dayPeriodToLong.toString should be("1499904000000")
+
+ // getWeeksBetween
+ val getWeeksBetween = CommonUtil.getWeeksBetween(1499904L, 1451759399L)
+ getWeeksBetween should be(2)
+
+ // getMetricEvent
+ val metricEvent = CommonUtil.getMetricEvent(Map("system" -> "DataProduct", "subsystem" -> "test", "metrics" -> List(V3MetricEdata("count", "100".asInstanceOf[AnyRef]))), "pipeline-monitoring", "dataproduct-metric")
+ metricEvent.context.pdata.get.id should be("pipeline-monitoring")
+ metricEvent.context.pdata.get.pid.get should be("dataproduct-metric")
+
+ val epochToTimestamp = CommonUtil.getTimestampFromEpoch(1537550355883L)
+ epochToTimestamp.toString should be("2018-09-21 17:19:15.883")
+
+ val connectionProperties = CommonUtil.getPostgresConnectionProps()
+ connectionProperties.getProperty("user") should be("postgres")
+ connectionProperties.getProperty("password") should be("postgres")
+ connectionProperties.getProperty("driver") should be("org.postgresql.Driver")
+
+ implicit val sc = CommonUtil.getSparkContext(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"))
+ val defaultCaseConf = CommonUtil.setStorageConf("local", Option(""), Option(""))
+
+ val azureStorageConf = CommonUtil.setStorageConf("azure", Option("azure_storage_key"), Option("azure_storage_secret"))
+ azureStorageConf.get("fs.azure") should be("org.apache.hadoop.fs.azure.NativeAzureFileSystem")
+ azureStorageConf.get("fs.azure.account.key.azure-test-key.blob.core.windows.net") should be("azure-test-secret")
+
+ val s3StorageConf = CommonUtil.setStorageConf("s3", Option("aws_storage_key"), Option("aws_storage_secret"))
+ s3StorageConf.get("fs.s3n.awsAccessKeyId") should be("aws-test-key")
+ s3StorageConf.get("fs.s3n.awsSecretAccessKey") should be("aws-test-secret")
+
+ val fileUtil = new HadoopFileUtil;
+ val copiedFile = fileUtil.copy("src/test/resources/sample_telemetry.log", "src/test/resources/sample_telemetry.json", sc.hadoopConfiguration)
+ sc.textFile(copiedFile, 1).count() should be (7437)
+ fileUtil.delete(sc.hadoopConfiguration, copiedFile)
+
+ sc.stop()
+ }
+
+ it should "test all the exception branches" in {
+
+ noException should be thrownBy {
+ val sc = CommonUtil.getSparkContext(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"));
+ sc.stop();
+ }
+
+ noException should be thrownBy {
+ val sc = CommonUtil.getSparkContext(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"), Option("10.0.0.0"), Option("2"));
+ sc.stop();
+ }
+
+ noException should be thrownBy {
+ val sc = CommonUtil.getSparkSession(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"), Option("Quorum"))
+ sc.stop();
}
+
+ noException should be thrownBy {
+ val sc = CommonUtil.getSparkSession(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"), Option("Quorum"), Option("10.0.0.0"), Option("2"))
+ sc.stop();
+ }
+
+ noException should be thrownBy {
+ val sc = CommonUtil.getSparkSession(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"), None)
+ sc.stop();
+ }
+
+ val event = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}"
+ val v3Event = JSONUtils.deserialize[V3Event](event);
+ CommonUtil.getEventSyncTS(v3Event) should be(1478679395699l);
+
+ CommonUtil.getFrameworkContext(None) should not be (null)
+
+ noException should be thrownBy {
+ CommonUtil.deleteDirectory("src/test/resources/abcdefg")
+ }
+
+ CommonUtil.createDirectory("src/test/resources/abcdefg")
+ val f = new File("src/test/resources/abcdefg")
+ f.exists() should be(true)
+ CommonUtil.deleteDirectory("src/test/resources/abcdefg")
+
+ CommonUtil.getValidTagsForWorkflow(DerivedEvent(null, 0l, 0l, null, null, null, null, None, None, null, null, null, None, Option(List("tag1", "tag2"))), Array("tag1")).head should be("tag1")
+ CommonUtil.getValidTagsForWorkflow(DerivedEvent(null, 0l, 0l, null, null, null, null, None, None, null, null, null, None, None), Array("tag1")).size should be(0)
+
+ val map = CommonUtil.caseClassToMapWithDateConversion(TestCaseClass("mid1", DateTime.now()))
+ map.get("mid").get should be("mid1");
+
+ CommonUtil.dayPeriodToLong(2020) should be(0)
+
+ CommonUtil.getTimestampOfDayPeriod(20200101) should be(1577836800000l)
+
+ CommonUtil.avg(List(3, 4, 5)) should be(4)
+
+ DateTimeUtils.setCurrentMillisFixed(1577836800000L);
+ CommonUtil.getIntervalRange("LastDay", "telemetry-rollup-syncts") should be("2019-12-31T00:00:00+00:00/2020-01-01T00:00:00+00:00")
+ CommonUtil.getIntervalRange("LastDay", "summary-rollup-syncts") should be("2019-12-31T00:00:00+00:00/2020-01-01T00:00:00+00:00")
+ CommonUtil.getIntervalRange("LastWeek","telemetry-rollup-syncts") should be("2019-12-23T05:30:00+00:00/2019-12-30T05:30:00+00:00")
+ CommonUtil.getIntervalRange("LastMonth","telemetry-rollup-syncts") should be("2019-12-01T05:30:00+00:00/2020-01-01T05:30:00+00:00")
+ CommonUtil.getIntervalRange("Last7Days", "telemetry-rollup-syncts") should be("2019-12-25T00:00:00+00:00/2020-01-01T00:00:00+00:00")
+ CommonUtil.getIntervalRange("Last30Days", "telemetry-rollup-syncts") should be("2019-12-02T00:00:00+00:00/2020-01-01T00:00:00+00:00")
+ CommonUtil.getIntervalRange("Last30Days", "telemetry-rollup-syncts", 0) should be("2019-12-02T00:00:00+00:00/2020-01-01T00:00:00+00:00")
+ CommonUtil.getIntervalRange("Last30Days", "telemetry-rollup-syncts", 2) should be("2019-11-30T00:00:00+00:00/2019-12-30T00:00:00+00:00")
+ CommonUtil.getIntervalRange("Last60Days", "telemetry-rollup-syncts") should be("Last60Days")
+ DateTimeUtils.setCurrentMillisSystem();
+
+ CommonUtil.getGranularity("") should be(GranularityType.All)
+
+ val eventListener = new TestEventListener();
+ EventBusUtil.register(eventListener)
+ EventBusUtil.dipatchEvent("Test Event");
+ eventListener.event should be("Test Event")
+ }
+
}
\ No newline at end of file
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestDatasetUtil.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestDatasetUtil.scala
new file mode 100644
index 00000000..1ea84eee
--- /dev/null
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestDatasetUtil.scala
@@ -0,0 +1,92 @@
+package org.ekstep.analytics.framework.util
+
+import org.ekstep.analytics.framework._
+import org.joda.time.LocalDate
+import java.io.File
+
+import org.joda.time.DateTime
+import java.util.Date
+import java.text.SimpleDateFormat
+
+import org.apache.hadoop.fs.Path
+
+import scala.collection.mutable.ListBuffer
+import org.joda.time.format.DateTimeFormat
+import org.ekstep.analytics.framework.Period._
+import org.apache.spark.sql.Encoders
+import org.ekstep.analytics.framework.util.DatasetUtil.extensions
+import org.apache.hadoop.fs.azure.AzureException
+import org.apache.hadoop.fs.s3.S3Exception
+import org.apache.spark.sql.functions.col
+
+class TestDatasetUtil extends BaseSpec {
+
+ "DatasetUtil" should "test the dataset extensions" in {
+
+ val fileUtil = new HadoopFileUtil();
+ val sparkSession = CommonUtil.getSparkSession(1, "TestDatasetUtil", None, None, None);
+ val rdd = sparkSession.sparkContext.parallelize(Seq(EnvSummary("env1", 22.1, 3), EnvSummary("env2", 20.1, 3), EnvSummary("env1", 32.1, 4)), 1);
+
+ import sparkSession.implicits._
+ val df = sparkSession.createDataFrame(rdd);
+ df.saveToBlobStore(StorageConfig("local", null, "src/test/resources"), "csv", "test-report", Option(Map("header" -> "true")), Option(Seq("env")));
+
+ val rdd2 = sparkSession.sparkContext.textFile("src/test/resources/test-report/env1.csv", 1).collect();
+ rdd2.head should be ("time_spent,count")
+ rdd2.last should be ("32.1,4")
+
+ df.saveToBlobStore(StorageConfig("local", null, "src/test/resources"), "csv", "test-report2", None, None);
+ val rdd3 = sparkSession.sparkContext.textFile("src/test/resources/test-report2.csv", 1).collect();
+ rdd3.head should be ("env1,22.1,3")
+ rdd3.last should be ("env1,32.1,4")
+
+ fileUtil.delete(sparkSession.sparkContext.hadoopConfiguration, "src/test/resources/test-report", "src/test/resources/test-report2", "src/test/resources/test-report2.csv");
+ sparkSession.stop();
+ }
+
+ it should "test exception branches" in {
+
+ val sparkSession = CommonUtil.getSparkSession(1, "TestDatasetUtil", None, None, None);
+ val rdd = sparkSession.sparkContext.parallelize(Seq(EnvSummary("env1", 22.1, 3), EnvSummary("env2", 20.1, 3), EnvSummary("env1", 32.1, 4)), 1);
+
+ import sparkSession.implicits._
+ val df = sparkSession.createDataFrame(rdd);
+ a[AzureException] should be thrownBy {
+ df.saveToBlobStore(StorageConfig("azure", "test-container", "src/test/resources"), "csv", "test-report", Option(Map("header" -> "true")), Option(Seq("env")));
+ }
+
+ a[S3Exception] should be thrownBy {
+ df.saveToBlobStore(StorageConfig("s3", "test-container", "src/test/resources"), "csv", "test-report", Option(Map("header" -> "true")), Option(Seq("env")));
+ }
+
+ sparkSession.stop();
+ }
+
+ "DatasetUtil" should "test the dataset copy functionality" in {
+
+ val fileUtil = new HadoopFileUtil();
+ val sparkSession = CommonUtil.getSparkSession(1, "TestDatasetUtil", None, None, None);
+ val rdd = sparkSession.sparkContext.parallelize(Seq(EnvSummary("env1", 22.1, 3), EnvSummary("env2", 20.1, 3), EnvSummary("env1", 32.1, 4)), 1);
+
+ val tempDir = "src/test/resources/test-report/_tmp"
+
+ val partitioningColumns = Option(Seq("env"));
+ val dims = partitioningColumns.getOrElse(Seq());
+ val options = Option(Map("header" -> "true"))
+ val df = sparkSession.createDataFrame(rdd);
+ val conf = sparkSession.sparkContext.hadoopConfiguration
+ val filePrefix = ""
+ val format = "csv"
+ val srcFS=new Path("src/test/resources/test-report/_tmp/env=env1")
+ val srcDir = srcFS.getFileSystem(conf)
+ fileUtil.delete(sparkSession.sparkContext.hadoopConfiguration, "" + tempDir)
+ val opts = options.getOrElse(Map());
+ df.coalesce(1).write.format(format).options(opts).partitionBy(dims: _*).save(filePrefix + tempDir);
+ fileUtil.copyMerge("" + "src/test/resources/test-report/_tmp/env=env1", "src/test/resources/test-report/env2.csv", sparkSession.sparkContext.hadoopConfiguration, false);
+ srcDir.delete(new Path("src/test/resources/test-report/_tmp/env=env1"), true)
+ fileUtil.delete(sparkSession.sparkContext.hadoopConfiguration, "src/test/resources/test-report", "src/test/resources/test-report2", "src/test/resources/test-report2.csv");
+ fileUtil.copyMerge("" + "src/test/resources/test-report/_tmp/env=env1", "src/test/resources/test-report/env2.csv", sparkSession.sparkContext.hadoopConfiguration, false);
+ sparkSession.stop();
+
+ }
+}
\ No newline at end of file
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestJobLogger.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestJobLogger.scala
index a85625d8..81174803 100644
--- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestJobLogger.scala
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestJobLogger.scala
@@ -44,6 +44,8 @@ class TestJobLogger extends BaseSpec {
JobLogger.log("testing warn method", None, WARN);
JobLogger.log("testing error method", None, ERROR);
+ JobLogger.logEvent("test event method", "org.ekstep.analytics", WARN)
+ JobLogger.logEvent("test event method", "org.ekstep.analytics", DEBUG)
}
}
\ No newline at end of file
diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestRestUtil.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestRestUtil.scala
index c9130780..f78927b5 100644
--- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestRestUtil.scala
+++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestRestUtil.scala
@@ -1,16 +1,18 @@
package org.ekstep.analytics.framework.util
+import akka.actor.ActorSystem
+import akka.http.scaladsl.model.{ContentTypes, HttpEntity, HttpMethods, HttpRequest}
import org.ekstep.analytics.framework.BaseSpec
import org.ekstep.analytics.framework.Metadata
import org.ekstep.analytics.framework.Request
import org.ekstep.analytics.framework.Response
import org.ekstep.analytics.framework.Search
import org.ekstep.analytics.framework.SearchFilter
-
import com.fasterxml.jackson.core.JsonParseException
import org.ekstep.analytics.framework.conf.AppConf
import org.ekstep.analytics.framework.Params
import com.google.common.net.InetAddresses
+import org.ekstep.analytics.framework.fetcher.AkkaHttpUtil
/**
* @author Santhosh
@@ -34,16 +36,20 @@ class TestRestUtil extends BaseSpec {
val url = "https://httpbin.org/xml";
val response = RestUtil.get[GetR](url);
response should be(null);
+
+ val url2 = "https://httpbin.org/xml";
+ val response2 = RestUtil.get[String](url2, Option(Map("Content-Type" -> "application/json")));
+ response2 should not be(null);
}
// TODO: Need to fix the Test cases with proper request
- /*it should "execute POST and parse response" in {
+ it should "execute POST and parse response" in {
val url = "https://httpbin.org/post?type=test";
- val response = RestUtil.post[PostR](url, "");
+ val response = RestUtil.post[PostR](url, "", Option(Map("accept" -> "application/json")));
response should not be null;
response.url should be("https://httpbin.org/post?type=test");
InetAddresses.isInetAddress(response.origin) should be(true);
- } */
+ }
it should "throw Exception if unable to parse the response during POST" in {
val url = "https://httpbin.org/post?type=test";
@@ -53,16 +59,43 @@ class TestRestUtil extends BaseSpec {
}
// TODO: Need to fix the Test cases with proper request
- /*it should "execute PATCH and parse response" in {
+ it should "execute PATCH and parse response" in {
val url = "https://httpbin.org/patch?type=test";
val request = Map("popularity" -> 1);
- val response = RestUtil.patch[PostR](url, JSONUtils.serialize(request));
+ val response = RestUtil.patch[PostR](url, JSONUtils.serialize(request), Option(Map("accept" -> "application/json")));
response should not be null;
response.url should be("https://httpbin.org/patch?type=test");
InetAddresses.isInetAddress(response.origin) should be(true);
response.data should be("{\"popularity\":1}");
response.json.get("popularity").get should be(1);
- } */
+ }
+
+ it should "execute PUT and parse response" in {
+ val url = "https://httpbin.org/put?type=test";
+ val request = Map("popularity" -> 1);
+ val response = RestUtil.put[PostR](url, JSONUtils.serialize(request), Option(Map("accept" -> "application/json")));
+ response should not be null;
+ response.url should be("https://httpbin.org/put?type=test");
+ InetAddresses.isInetAddress(response.origin) should be(true);
+ response.data should be("{\"popularity\":1}");
+ response.json.get("popularity").get should be(1);
+
+ val url2 = "https://httpbin.org/put?type=test";
+ val response2 = RestUtil.put[PostErrR](url2, JSONUtils.serialize(request));
+ response2 should be(null);
+ }
+
+ it should "execute Delete and parse response" in {
+ val url = "https://httpbin.org/delete";
+ val response = RestUtil.delete[PostR](url, Option(Map("accept" -> "application/json")));
+ response should not be null;
+ response.url should be("https://httpbin.org/delete");
+ InetAddresses.isInetAddress(response.origin) should be(true);
+
+ val url2 = "https://httpbin.org/delete/xml";
+ val response2 = RestUtil.delete[PostErrR](url2, None);
+ response2 should be (null);
+ }
it should "throw Exception if unable to parse the response during PATCH" in {
val url = "https://httpbin.org/patch?type=test";
@@ -71,4 +104,15 @@ class TestRestUtil extends BaseSpec {
response should be(null);
}
+ it should "should test akka util" in {
+ val url = "https://httpbin.org/patch?type=test";
+ implicit val system=ActorSystem("Test")
+ val request = HttpRequest(method = HttpMethods.POST,
+ uri = url,
+ entity = HttpEntity(ContentTypes.`application/json`, JSONUtils.serialize(Map("popularity" -> 1))))
+ val response = AkkaHttpUtil.sendRequest(request)
+ response should not be (null);
+ system.terminate()
+ }
+
}
\ No newline at end of file
diff --git a/analytics-job-driver/pom.xml b/analytics-job-driver/pom.xml
index a4db3ba0..7a83b08d 100644
--- a/analytics-job-driver/pom.xml
+++ b/analytics-job-driver/pom.xml
@@ -42,13 +42,23 @@
commons-codec
commons-codec
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ org.apache.hadoop
+ hadoop-client
+ 2.7.3
+ provided
+
org.apache.spark
spark-streaming-kafka_${scala.maj.version}
- 1.6.2
+ 1.6.3
org.apache.kafka
@@ -64,19 +74,13 @@
org.scalatest
scalatest_${scala.maj.version}
- 2.2.4
+ 3.0.5
test
-
- analytics-framework-1.0
+ analytics-framework-2.0
src/main/scala
src/test/scala
diff --git a/analytics-job-driver/src/main/scala/org/ekstep/analytics/framework/driver/BatchJobDriver.scala b/analytics-job-driver/src/main/scala/org/ekstep/analytics/framework/driver/BatchJobDriver.scala
index b4dd9ea4..564f66b3 100644
--- a/analytics-job-driver/src/main/scala/org/ekstep/analytics/framework/driver/BatchJobDriver.scala
+++ b/analytics-job-driver/src/main/scala/org/ekstep/analytics/framework/driver/BatchJobDriver.scala
@@ -36,7 +36,9 @@ object BatchJobDriver {
}
val autocloseSC = if (sc.isEmpty) true else false;
val frameworkContext = if (fc.isEmpty) {
- CommonUtil.getFrameworkContext(Option(Array((AppConf.getConfig("cloud_storage_type"), AppConf.getConfig("cloud_storage_type"), AppConf.getConfig("cloud_storage_type")))));
+ val storageKey = config.modelParams.getOrElse(Map()).getOrElse("storageKeyConfig", "azure_storage_key").asInstanceOf[String]
+ val storageSecret = config.modelParams.getOrElse(Map()).getOrElse("storageSecretConfig", "azure_storage_secret").asInstanceOf[String]
+ CommonUtil.getFrameworkContext(Option(Array((AppConf.getConfig("cloud_storage_type"), storageKey, storageSecret))));
} else {
fc.get
}
@@ -57,33 +59,43 @@ object BatchJobDriver {
private def _process[T, R](config: JobConfig, models: List[IBatchModel[T, R]])(implicit mf: Manifest[T], mfr: Manifest[R], sc: SparkContext, fc: FrameworkContext) {
- val rdd = DataFetcher.fetchBatchData[T](config.search).cache();
- val count = rdd.count;
+ fc.inputEventsCount = sc.longAccumulator("InputEventsCount");
+ fc.outputEventsCount = sc.longAccumulator("OutputEventsCount");
+ val rdd = DataFetcher.fetchBatchData[T](config.search);
val data = DataFilter.filterAndSort[T](rdd, config.filters, config.sort);
models.foreach { model =>
- JobContext.jobName = model.name
// TODO: It is not necessary that the end date always exists. The below log statement might throw exceptions
+ // $COVERAGE-OFF$
+ fc.outputEventsCount.reset();
val endDate = config.search.queries.getOrElse(Array(Query())).last.endDate
- JobLogger.start("Started processing of " + model.name, Option(Map("config" -> config, "model" -> model.name, "date" -> endDate)));
+ // $COVERAGE-ON$
+ val modelName = if(config.modelParams.nonEmpty && config.modelParams.get.get("modelName").nonEmpty)
+ config.modelParams.get.get("modelName").get.asInstanceOf[String]
+ else model.name
+ JobContext.jobName = modelName
+ JobLogger.start("Started processing of " + modelName, Option(Map("config" -> config, "model" -> model.name, "date" -> endDate)));
try {
val result = _processModel(config, data, model);
// generate metric event and push it to kafka topic
- val date = if (endDate.isEmpty) new DateTime().toString(CommonUtil.dateFormat) else endDate.get
- val metrics = List(V3MetricEdata("date", date.asInstanceOf[AnyRef]), V3MetricEdata("inputEvents", count.asInstanceOf[AnyRef]),
- V3MetricEdata("outputEvents", result._2.asInstanceOf[AnyRef]), V3MetricEdata("timeTakenSecs", Double.box(result._1 / 1000).asInstanceOf[AnyRef]))
- val metricEvent = CommonUtil.getMetricEvent(Map("system" -> "DataProduct", "subsystem" -> model.name, "metrics" -> metrics), AppConf.getConfig("metric.producer.id"), AppConf.getConfig("metric.producer.pid"))
+ val metrics = List(Map("id" -> "input-events", "value" -> fc.inputEventsCount.value.asInstanceOf[AnyRef]), Map("id" -> "output-events", "value" -> result._2.asInstanceOf[AnyRef]), Map("id" -> "time-taken-secs", "value" -> Double.box(result._1 / 1000).asInstanceOf[AnyRef]))
+ val metricEvent = getMetricJson(model.name, endDate, "SUCCESS", metrics)
+ // $COVERAGE-OFF$
if (AppConf.getConfig("push.metrics.kafka").toBoolean)
- KafkaDispatcher.dispatch(Array(JSONUtils.serialize(metricEvent)), Map("topic" -> AppConf.getConfig("metric.kafka.topic"), "brokerList" -> AppConf.getConfig("metric.kafka.broker")))
+ KafkaDispatcher.dispatch(Array(metricEvent), Map("topic" -> AppConf.getConfig("metric.kafka.topic"), "brokerList" -> AppConf.getConfig("metric.kafka.broker")))
+ // $COVERAGE-ON$
- JobLogger.end(model.name + " processing complete", "SUCCESS", Option(Map("model" -> model.name, "date" -> endDate, "inputEvents" -> count, "outputEvents" -> result._2, "timeTaken" -> Double.box(result._1 / 1000))));
+ JobLogger.end(modelName + " processing complete", "SUCCESS", Option(Map("model" -> model.name, "date" -> endDate, "inputEvents" -> fc.inputEventsCount.value, "outputEvents" -> result._2, "timeTaken" -> Double.box(result._1 / 1000))));
} catch {
case ex: Exception =>
JobLogger.log(ex.getMessage, None, ERROR);
- JobLogger.end(model.name + " processing failed", "FAILED", Option(Map("model" -> model.name, "date" -> endDate, "inputEvents" -> count, "statusMsg" -> ex.getMessage)));
+ JobLogger.end(modelName + " processing failed", "FAILED", Option(Map("model" -> model.name, "date" -> endDate, "statusMsg" -> ex.getMessage)));
+ val metricEvent = getMetricJson(model.name, endDate, "FAILED", List(Map("id" -> "input-events", "value" -> fc.inputEventsCount.value.asInstanceOf[AnyRef])))
+ // $COVERAGE-OFF$
+ if (AppConf.getConfig("push.metrics.kafka").toBoolean)
+ KafkaDispatcher.dispatch(Array(metricEvent), Map("topic" -> AppConf.getConfig("metric.kafka.topic"), "brokerList" -> AppConf.getConfig("metric.kafka.broker")))
+ // $COVERAGE-ON$
ex.printStackTrace();
- } finally {
- rdd.unpersist()
}
}
}
@@ -92,11 +104,18 @@ object BatchJobDriver {
CommonUtil.time({
val output = model.execute(data, config.modelParams);
- // JobContext.recordRDD(output);
val count = OutputDispatcher.dispatch(config.output, output);
- // JobContext.cleanUpRDDs();
- count;
+ fc.outputEventsCount.value
})
}
+
+ def getMetricJson(subsystem: String, endDate: Option[String], status: String, metrics: List[Map[String, AnyRef]]): String = {
+ // $COVERAGE-OFF$
+ val date = if (endDate.isEmpty) new DateTime().toString(CommonUtil.dateFormat) else endDate.get
+ // $COVERAGE-ON$
+ val dims = List(Map("id" -> "report-date", "value" -> date), Map("id" -> "status", "value" -> status))
+ val metricEvent = Map("metricts" -> System.currentTimeMillis(), "system" -> "DataProduct", "subsystem" -> subsystem, "metrics" -> metrics, "dimensions" -> dims)
+ JSONUtils.serialize(metricEvent)
+ }
}
\ No newline at end of file
diff --git a/analytics-job-driver/src/test/resources/application.conf b/analytics-job-driver/src/test/resources/application.conf
index 759fae4b..41ac850e 100644
--- a/analytics-job-driver/src/test/resources/application.conf
+++ b/analytics-job-driver/src/test/resources/application.conf
@@ -19,4 +19,6 @@ metric.producer.id="pipeline.monitoring"
metric.producer.pid="dataproduct.metrics"
push.metrics.kafka=false
metric.kafka.broker="localhost:9092"
-metric.kafka.topic="metric"
\ No newline at end of file
+metric.kafka.topic="metric"
+
+cloud_storage_type=azure
\ No newline at end of file
diff --git a/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver.scala b/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver.scala
index 3f5e633e..fbc9d819 100644
--- a/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver.scala
+++ b/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver.scala
@@ -24,7 +24,7 @@ object TestModel2 extends IBatchModel[MeasuredEvent, String] with Serializable {
object TestModel3 extends IBatchModel[MeasuredEvent, String] with Serializable {
- def execute(events: RDD[MeasuredEvent], jobParams: Option[Map[String, AnyRef]])(implicit sc: SparkContext, fc: FrameworkContext): RDD[String] = {
+ def execute(events: RDD[MeasuredEvent], jobParams: Option[Map[String, AnyRef]])(implicit sc: SparkContext, fc: FrameworkContext): RDD[String] = {
val contents = events.map { x => x.content_id.getOrElse("") }
contents;
}
@@ -52,7 +52,7 @@ class TestJobDriver extends FlatSpec with Matchers with BeforeAndAfterAll {
Option(Array[Filter](Filter("eventId", "IN", Option(Array("OE_ASSESS", "OE_START", "OE_END", "OE_LEVEL_SET"))))),
None,
"org.ekstep.analytics.framework.TestModel",
- Option(Map()),
+ Option(Map("modelName" -> "TestModelJob")),
Option(Array(Dispatcher("console", Map("printEvent" -> false.asInstanceOf[AnyRef])))),
Option(8),
None,
@@ -176,6 +176,7 @@ class TestJobDriver extends FlatSpec with Matchers with BeforeAndAfterAll {
JobDriver.run[MeasuredEvent, String]("batch", JSONUtils.serialize(jobConfig), models, "TestMergeJobs");
CommonUtil.closeSparkContext()(sc.get);
}
+
}
it should "run the stream job driver on multiple models" in {
@@ -198,4 +199,5 @@ class TestJobDriver extends FlatSpec with Matchers with BeforeAndAfterAll {
JobDriver.run("batch", JSONUtils.serialize(""), models, "TestMergeJobs");
}
}
-}
\ No newline at end of file
+}
+
diff --git a/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver2.scala b/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver2.scala
new file mode 100644
index 00000000..97c7b546
--- /dev/null
+++ b/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver2.scala
@@ -0,0 +1,49 @@
+package org.ekstep.analytics.framework
+
+import org.scalatest.FlatSpec
+import org.scalatest.Matchers
+import org.scalatest.BeforeAndAfterAll
+import org.ekstep.analytics.framework.util.CommonUtil
+import org.ekstep.analytics.framework.util.JSONUtils
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext
+
+case class Dummy(event:String) extends AlgoInput with AlgoOutput with Output
+object TestModel4 extends IBatchModelTemplate[Event, Dummy, Dummy, Dummy] with Serializable {
+
+ override def preProcess(events: RDD[Event], config: Map[String, AnyRef])(implicit sc: SparkContext, fc: FrameworkContext): RDD[Dummy] = {
+ events.map { x => Dummy(JSONUtils.serialize(x)) };
+ }
+
+ override def algorithm(events: RDD[Dummy], config: Map[String, AnyRef])(implicit sc: SparkContext, fc: FrameworkContext): RDD[Dummy] = {
+ events
+ }
+
+ override def postProcess(events: RDD[Dummy], config: Map[String, AnyRef])(implicit sc: SparkContext, fc: FrameworkContext): RDD[Dummy] = {
+ events
+ }
+
+ override def name: String = "TestModel4";
+}
+
+class TestJobDriver2 extends FlatSpec with Matchers with BeforeAndAfterAll {
+
+ it should "run the batch job driver on model implementing BatchModelTemplate" in {
+
+ val jobConfig = JobConfig(
+ Fetcher("local", None, Option(Array(Query(None, None, None, None, None, None, None, None, None, Option("src/test/resources/sample_telemetry.log"))))),
+ Option(Array[Filter](Filter("eventId", "EQ", Option("OE_START")))),
+ None,
+ "org.ekstep.analytics.framework.TestModel2",
+ Option(Map()),
+ Option(Array(Dispatcher("console", Map("printEvent" -> false.asInstanceOf[AnyRef])))),
+ Option(8),
+ None,
+ None)
+
+ implicit val sc = Option(CommonUtil.getSparkContext(1, "Test"));
+ implicit val fc:Option[FrameworkContext] = None;
+ JobDriver.run("batch", JSONUtils.serialize(jobConfig), TestModel4);
+ CommonUtil.closeSparkContext()(sc.get);
+ }
+}
\ No newline at end of file
diff --git a/auto_build_deploy b/auto_build_deploy
new file mode 100644
index 00000000..d9415a7d
--- /dev/null
+++ b/auto_build_deploy
@@ -0,0 +1,53 @@
+@Library('deploy-conf') _
+node('build-slave') {
+ try {
+ String ANSI_GREEN = "\u001B[32m"
+ String ANSI_NORMAL = "\u001B[0m"
+ String ANSI_BOLD = "\u001B[1m"
+ String ANSI_RED = "\u001B[31m"
+ String ANSI_YELLOW = "\u001B[33m"
+ ansiColor('xterm') {
+ stage('Checkout') {
+ tag_name = env.JOB_NAME.split("/")[-1]
+ if (!tag_name.contains(env.public_repo_branch)) {
+ println("Error.. Tag does not contain " + env.public_repo_branch)
+ error("Oh ho! Tag is not a release candidate.. Skipping build")
+ }
+ cleanWs()
+ def scmVars = checkout scm
+ checkout scm: [$class: 'GitSCM', branches: [[name: "refs/tags/$tag_name"]], userRemoteConfigs: [[url: scmVars.GIT_URL]]]
+ commit_hash = sh(script: 'git rev-parse --short HEAD', returnStdout: true).trim()
+ artifact_version = tag_name + "_" + commit_hash
+ echo "artifact_version: "+ artifact_version
+ }
+ }
+
+ stage('Build') {
+ sh '''
+ mvn clean install -DskipTests
+ '''
+ }
+ stage('Archive artifacts'){
+ sh """
+ mkdir lpa_core_artifacts
+ cp analytics-job-driver/target/analytics-framework-2.0.jar lpa_core_artifacts
+ cp analytics-core/lib/scruid*.jar lpa_core_artifacts
+ zip -j lpa_core_artifacts.zip:${artifact_version} lpa_core_artifacts/*
+ """
+ archiveArtifacts artifacts: "lpa_core_artifacts.zip:${artifact_version}", fingerprint: true, onlyIfSuccessful: true
+ sh """echo {\\"artifact_name\\" : \\"lpa_core_artifacts.zip\\", \\"artifact_version\\" : \\"${artifact_version}\\", \\"node_name\\" : \\"${env.NODE_NAME}\\"} > metadata.json"""
+ archiveArtifacts artifacts: 'metadata.json', onlyIfSuccessful: true
+ currentBuild.description = artifact_version
+ }
+ currentBuild.result = "SUCCESS"
+ slack_notify(currentBuild.result, tag_name)
+ email_notify()
+ auto_build_deploy()
+ }
+ catch (err) {
+ currentBuild.result = "FAILURE"
+ slack_notify(currentBuild.result, tag_name)
+ email_notify()
+ throw err
+ }
+}
diff --git a/pom.xml b/pom.xml
index df744762..2b633ea0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -14,8 +14,8 @@
1.1.1
2.11
2.11.11
- 2.0
- 2.0.1
+ 2.4
+ 2.4.4
@@ -30,6 +30,15 @@
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+ 8
+ 8
+
+
maven-assembly-plugin
2.3