diff --git a/.circleci/config.yml b/.circleci/config.yml index d7f638fa..608b3270 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,14 +1,19 @@ version: 2.1 jobs: analytics-core-build: - machine: true + machine: + image: ubuntu-2004:202008-01 steps: - checkout - restore_cache: key: dp-dependency-cache-{{ checksum "pom.xml" }} - run: name: sunbird-analytics-core-build - command: mvn scoverage:report + command: | + export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64 + export PATH=$JAVA_HOME/bin:$PATH + echo $(java -version) + mvn scoverage:report - save_cache: key: dp-dependency-cache-{{ checksum "pom.xml" }} paths: ~/.m2 @@ -20,10 +25,11 @@ jobs: - run: name: sonar command: | + export JAVA_HOME=/usr/lib/jvm/java-1.11.0-openjdk-amd64 mvn -X sonar:sonar -Dsonar.projectKey=project-sunbird_sunbird-analytics-core -Dsonar.organization=project-sunbird -Dsonar.exclusions=analytics-core/src/main/scala/org/ekstep/analytics/streaming/** -Dsonar.host.url=https://sonarcloud.io -Dsonar.scala.coverage.reportPaths=/home/circleci/project/target/scoverage.xml workflows: version: 2.1 workflow: jobs: - - analytics-core-build \ No newline at end of file + - analytics-core-build diff --git a/.gitignore b/.gitignore index 2673f6a3..e0c5ae3a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ output-* dependency-reduced-pom.xml **/target **/logs +**/.idea/** +*.iml diff --git a/Jenkinsfile b/Jenkinsfile index 4b87ace2..8a963927 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -32,8 +32,9 @@ node('build-slave') { } stage('Archive artifacts'){ sh """ - mkdir lpa_artifacts + mkdir lpa_core_artifacts cp analytics-job-driver/target/analytics-framework-2.0.jar lpa_core_artifacts + cp analytics-core/lib/scruid*.jar lpa_core_artifacts zip -j lpa_core_artifacts.zip:${artifact_version} lpa_core_artifacts/* """ archiveArtifacts artifacts: "lpa_core_artifacts.zip:${artifact_version}", fingerprint: true, onlyIfSuccessful: true diff --git a/analytics-core/lib/scruid_2.11-2.4.0.jar b/analytics-core/lib/scruid_2.11-2.4.0.jar new file mode 100644 index 00000000..61e36184 Binary files /dev/null and b/analytics-core/lib/scruid_2.11-2.4.0.jar differ diff --git a/analytics-core/pom.xml b/analytics-core/pom.xml index 4d786df9..a7137dbe 100644 --- a/analytics-core/pom.xml +++ b/analytics-core/pom.xml @@ -103,8 +103,7 @@ org.apache.httpcomponents httpclient - 4.5.2 - provided + 4.5.6 com.typesafe @@ -248,12 +247,75 @@ commons-text 1.6 + + + com.typesafe.scala-logging + scala-logging_2.11 + 3.6.0 + ing.wbaa.druid scruid_${scala.maj.version} - 2.3.0 + 2.4.0 + system + ${project.basedir}/lib/scruid_2.11-2.4.0.jar + + + + + io.circe + circe-core_2.11 + 0.11.2 + + + + io.circe + circe-parser_2.11 + 0.11.2 + + + + io.circe + circe-generic_2.11 + 0.11.2 + + + + org.mdedetrich + akka-stream-json_2.11 + 0.4.0 + + + + org.mdedetrich + akka-http-json_2.11 + 0.4.0 + + + org.mdedetrich + akka-stream-circe_2.11 + 0.4.0 + + + + org.mdedetrich + akka-http-circe_2.11 + 0.4.0 + + + + com.typesafe.akka + akka-http_2.11 + 10.1.9 + + + io.circe + circe-java8_2.11 + 0.11.1 + + org.elasticsearch elasticsearch-hadoop @@ -272,6 +334,12 @@ postgresql 9.1-901.jdbc4 + + io.zonky.test + embedded-postgres + 1.2.6 + test + @@ -290,6 +358,8 @@ testCompile + 8 + 8 -dependencyfile ${project.build.directory}/.scala_dependencies diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFetcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFetcher.scala index 8a3270a8..a14ef530 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFetcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFetcher.scala @@ -7,11 +7,11 @@ import org.apache.spark.streaming.dstream.DStream import org.ekstep.analytics.framework.Level.INFO import org.ekstep.analytics.framework.exception.DataFetcherException import org.ekstep.analytics.framework.fetcher.{AzureDataFetcher, DruidDataFetcher, S3DataFetcher} -import org.ekstep.analytics.framework.util.{JSONUtils, JobLogger} +import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, JobLogger} /** - * @author Santhosh - */ + * @author Santhosh + */ object DataFetcher { implicit val className = "org.ekstep.analytics.framework.DataFetcher" @@ -33,37 +33,59 @@ object DataFetcher { AzureDataFetcher.getObjectKeys(search.queries.get); case "local" => JobLogger.log("Fetching the batch data from Local file") - search.queries.get.map { x => x.file.getOrElse("") }.filterNot { x => x == null }; + search.queries.get.map { x => x.file.getOrElse(null) }.filterNot { x => x == null }; case "druid" => JobLogger.log("Fetching the batch data from Druid") val data = DruidDataFetcher.getDruidData(search.druidQuery.get) + // $COVERAGE-OFF$ + // Disabling scoverage as the below code cannot be covered as DruidDataFetcher is not mockable being an object and embedded druid is not available yet val druidDataList = data.map(f => JSONUtils.deserialize[T](f)) - return sc.parallelize(druidDataList); + return druidDataList + // $COVERAGE-ON$ case _ => throw new DataFetcherException("Unknown fetcher type found"); } + if (null == keys || keys.length == 0) { return sc.parallelize(Seq[T](), JobContext.parallelization); } JobLogger.log("Deserializing Input Data", None, INFO); + val filteredKeys = search.queries.get.map{q => + getFilteredKeys(q, keys, q.partitions) + }.flatMap(f => f) + val isString = mf.runtimeClass.getName.equals("java.lang.String"); - sc.textFile(keys.mkString(","), JobContext.parallelization).map { line => { + val inputEventsCount = fc.inputEventsCount; + sc.textFile(filteredKeys.mkString(","), JobContext.parallelization).map { line => { try { + inputEventsCount.add(1); if (isString) line.asInstanceOf[T] else JSONUtils.deserialize[T](line); } catch { case ex: Exception => JobLogger.log(ex.getMessage, None, INFO); null.asInstanceOf[T] - } } + } }.filter { x => x != null }; } /** - * API to fetch the streaming data given an array of query objects - */ + * API to fetch the streaming data given an array of query objects + */ def fetchStreamData[T](sc: StreamingContext, search: Fetcher)(implicit mf: Manifest[T]): DStream[T] = { null; } + def getFilteredKeys(query: Query, keys: Array[String], partitions: Option[List[Int]]): Array[String] = { + if (partitions.nonEmpty) { + val finalKeys = keys.map{f => + partitions.get.map{p => + val reg = raw"(\d{4})-(\d{2})-(\d{2})-$p-".r.findFirstIn(f) + if(reg.nonEmpty && f.contains(reg.get)) f else "" + } + }.flatMap(f => f) + finalKeys.filter(f => f.nonEmpty) + } + else keys + } } \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFilter.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFilter.scala index 3b8dd775..bca8cb86 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFilter.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/DataFilter.scala @@ -133,16 +133,6 @@ object DataFilter { val eventMap = CommonUtil.caseClassToMap(event) CommonUtil.getTimestamp(eventMap.get("$attimestamp").get.asInstanceOf[String]).asInstanceOf[AnyRef]; } - case "gameId" => - val gid = getBeanProperty(event, "edata.eks.gid"); - if (null == gid) - getBeanProperty(event, "gdata.id"); - else - gid; - case "genieTag" => - val tags = if(event.isInstanceOf[Event]) CommonUtil.getETags(event.asInstanceOf[Event]).app else getBeanProperty(event, "etags").asInstanceOf[ETags].app; - if (tags.isDefined) tags.get else List() - case "gameVersion" => getBeanProperty(event, "gdata.ver"); case "userId" => getBeanProperty(event, "uid"); case "sessionId" => getBeanProperty(event, "sid"); case "telemetryVersion" => getBeanProperty(event, "ver"); diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/FrameworkContext.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/FrameworkContext.scala index b34e6ea9..88308508 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/FrameworkContext.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/FrameworkContext.scala @@ -1,17 +1,27 @@ package org.ekstep.analytics.framework -import ing.wbaa.druid.DruidConfig +import ing.wbaa.druid.{DruidConfig, QueryHost} import ing.wbaa.druid.client.DruidClient +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.sql.types.StructType import org.sunbird.cloud.storage.BaseStorageService -import org.sunbird.cloud.storage.conf.AppConf -import org.sunbird.cloud.storage.factory.{StorageConfig, StorageServiceFactory} +import org.sunbird.cloud.storage.factory.StorageServiceFactory import scala.collection.mutable.Map +import org.ekstep.analytics.framework.util.HadoopFileUtil +import org.apache.spark.util.LongAccumulator +import org.ekstep.analytics.framework.conf.AppConf +import org.ekstep.analytics.framework.fetcher.{AkkaHttpClient, AkkaHttpUtil, DruidDataFetcher} class FrameworkContext { var dc: DruidClient = null; + var drc: DruidClient = null; var storageContainers: Map[String, BaseStorageService] = Map(); + val fileUtil = new HadoopFileUtil(); + + var inputEventsCount: LongAccumulator = _ + var outputEventsCount: LongAccumulator = _ def initialize(storageServices: Option[Array[(String, String, String)]]) { dc = DruidConfig.DefaultConfig.client; @@ -26,15 +36,23 @@ class FrameworkContext { getStorageService(storageType, storageType, storageType); } + def getHadoopFileUtil(): HadoopFileUtil = { + return fileUtil; + } + def getStorageService(storageType: String, storageKey: String, storageSecret: String): BaseStorageService = { + if("local".equals(storageType)) { + return null; + } if (!storageContainers.contains(storageType + "|" + storageKey)) { - storageContainers.put(storageType, StorageServiceFactory.getStorageService(StorageConfig(storageType, AppConf.getStorageKey(storageKey), AppConf.getStorageSecret(storageSecret)))); + storageContainers.put(storageType + "|" + storageKey, StorageServiceFactory.getStorageService(org.sunbird.cloud.storage.factory.StorageConfig(storageType, AppConf.getConfig(storageKey), AppConf.getConfig(storageSecret)))); } - storageContainers.get(storageType).get + storageContainers.get(storageType + "|" + storageKey).get } - def setDruidClient(druidClient: DruidClient) { + def setDruidClient(druidClient: DruidClient, druidRollupClient: DruidClient) { dc = druidClient; + drc = druidRollupClient; } def getDruidClient(): DruidClient = { @@ -44,19 +62,49 @@ class FrameworkContext { return dc; } + def getDruidRollUpClient(): DruidClient = { + if (null == drc) { + val conf = DruidConfig.DefaultConfig + drc = DruidConfig.apply( + Seq(QueryHost(AppConf.getConfig("druid.rollup.host"), AppConf.getConfig("druid.rollup.port").toInt)), + conf.secure, + conf.url,conf.healthEndpoint,conf.datasource,conf.responseParsingTimeout,conf.clientBackend, + conf.clientConfig,conf.scanQueryLegacyMode,conf.zoneId,conf.system).client + } + return drc; + } + + def getAkkaHttpUtil(): AkkaHttpClient = { + AkkaHttpUtil + } + def shutdownDruidClient() = { if (dc != null) dc.actorSystem.terminate() } + def shutdownDruidRollUpClien() = { + if (drc != null) drc.actorSystem.terminate() + } + def shutdownStorageService() = { - if (null != storageContainers) { + if (storageContainers.nonEmpty) { storageContainers.foreach(f => f._2.closeContext()); } } def closeContext() = { shutdownDruidClient(); + shutdownDruidRollUpClien(); shutdownStorageService(); } + def loadData(spark: SparkSession, settings: scala.collection.Map[String, String], url: String, schema: StructType): DataFrame = { + if (schema.nonEmpty) { + spark.read.schema(schema).format(url).options(settings).load() + } + else { + spark.read.format(url).options(settings).load() + } + } + } \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/Models.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/Models.scala index bf78b54b..acb6011a 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/Models.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/Models.scala @@ -63,7 +63,7 @@ class ProfileEvent(val eid: String, val ts: String, val `@timestamp`: String, va case class UserProfile(uid: String, gender: String, age: Int) // Analytics Framework Job Models -case class Query(bucket: Option[String] = None, prefix: Option[String] = None, startDate: Option[String] = None, endDate: Option[String] = None, delta: Option[Int] = None, brokerList: Option[String] = None, topic: Option[String] = None, windowType: Option[String] = None, windowDuration: Option[Int] = None, file: Option[String] = None, excludePrefix: Option[String] = None, datePattern: Option[String] = None, folder: Option[String] = None, creationDate: Option[String] = None) +case class Query(bucket: Option[String] = None, prefix: Option[String] = None, startDate: Option[String] = None, endDate: Option[String] = None, delta: Option[Int] = None, brokerList: Option[String] = None, topic: Option[String] = None, windowType: Option[String] = None, windowDuration: Option[Int] = None, file: Option[String] = None, excludePrefix: Option[String] = None, datePattern: Option[String] = None, folder: Option[String] = None, creationDate: Option[String] = None, partitions: Option[List[Int]] = None) @scala.beans.BeanInfo case class Filter(name: String, operator: String, value: Option[AnyRef] = None) @scala.beans.BeanInfo @@ -77,11 +77,20 @@ case class JobConfig(search: Fetcher, filters: Option[Array[Filter]], sort: Opti //Druid Query Models @scala.beans.BeanInfo -case class DruidQueryModel(queryType: String, dataSource: String, intervals: String, granularity: Option[String] = Option("all"), aggregations: Option[List[Aggregation]] = Option(List(Aggregation(Option("count"), "count", "count"))), dimensions: Option[List[DruidDimension]] = None, filters: Option[List[DruidFilter]] = None, having: Option[DruidHavingFilter] = None, postAggregation: Option[List[PostAggregation]] = None, threshold: Option[Long] = None, metric: Option[String] = None, descending: Option[String] = Option("false")) +case class DruidQueryModel(queryType: String, dataSource: String, intervals: String, granularity: Option[String] = Option("all"), aggregations: Option[List[Aggregation]] = Option(List(Aggregation(Option("count"), "count", "count"))), dimensions: Option[List[DruidDimension]] = None, filters: Option[List[DruidFilter]] = None, having: Option[DruidHavingFilter] = None, postAggregation: Option[List[PostAggregation]] = None, columns: Option[List[String]] = None,sqlDimensions: Option[List[DruidSQLDimension]] = None, threshold: Option[Long] = None, metric: Option[String] = None, descending: Option[String] = Option("false"), intervalSlider: Int = 0) + +@scala.beans.BeanInfo +case class DruidSQLQuery(query: String, resultFormat : String = "objectLines", header:Boolean =true ) + @scala.beans.BeanInfo -case class DruidDimension(fieldName: String, aliasName: Option[String]) +case class DruidSQLDimension(fieldName: String, function: Option[String]) + @scala.beans.BeanInfo -case class Aggregation(name: Option[String], `type`: String, fieldName: String, fnAggregate: Option[String] = None, fnCombine: Option[String] = None, fnReset: Option[String] = None) +case class DruidDimension(fieldName: String, aliasName: Option[String], `type`: Option[String] = Option("Default"), outputType: Option[String] = None, extractionFn: Option[List[ExtractFn]] = None) +@scala.beans.BeanInfo +case class ExtractFn(`type`: String, fn: String, retainMissingValue: Option[Boolean] = Option(false), replaceMissingValueWith: Option[String] = None) +@scala.beans.BeanInfo +case class Aggregation(name: Option[String], `type`: String, fieldName: String, fnAggregate: Option[String] = None, fnCombine: Option[String] = None, fnReset: Option[String] = None, lgK: Option[Int] = Option(12), tgtHllType: Option[String] = Option("HLL_4"), round: Option[Boolean] = None, filterAggType: Option[String] = None, filterFieldName: Option[String] = None, filterValue: Option[AnyRef] = None) @scala.beans.BeanInfo case class PostAggregation(`type`: String, name: String, fields: PostAggregationFields, fn: String, ordering: Option[String] = None) // only right field can have type as FieldAccess or Constant. Only if it Constant, need to specify "rightFieldType" @@ -242,4 +251,21 @@ case class DeviceProfileOutput(device_id: String, first_access: Option[Timestamp state_custom: Option[String], state_code_custom: Option[String], district_custom: Option[String], fcm_token: Option[String], producer_id: Option[String], user_declared_state: Option[String], user_declared_district: Option[String], api_last_updated_on: Option[Timestamp], user_declared_on: Option[Timestamp], - updated_date: Option[Timestamp] = Option(new Timestamp(System.currentTimeMillis()))) extends AlgoOutput \ No newline at end of file + updated_date: Option[Timestamp] = Option(new Timestamp(System.currentTimeMillis()))) extends AlgoOutput + + +case class StorageConfig(store: String, container: String, fileName: String, accountKey: Option[String] = None, secretKey: Option[String] = None); + +case class OnDemandJobRequest(request_id: String, request_data : String,download_urls :List[String], status: String) + +@scala.beans.BeanInfo +case class DruidOutput(t: Map[String, Any]) extends Map[String,Any] with Input with AlgoInput with AlgoOutput with Output { + private val internalMap = t + override def +[B1 >: Any](kv: (String, B1)): Map[String, B1] = new DruidOutput(internalMap + kv) + + override def get(key: String): Option[Any] =internalMap.get(key) + + override def iterator: Iterator[(String, Any)] = internalMap.iterator + + override def -(key: String): Map[String, Any] = new DruidOutput(internalMap - key) +} diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/OutputDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/OutputDispatcher.scala index 2d6a5305..f68a0bd6 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/OutputDispatcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/OutputDispatcher.scala @@ -20,11 +20,6 @@ object OutputDispatcher { implicit val className = "org.ekstep.analytics.framework.OutputDispatcher"; - @throws(classOf[DispatcherException]) - private def _dispatch(dispatcher: Dispatcher, events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = { - DispatcherFactory.getDispatcher(dispatcher).dispatch(dispatcher.params, events); - } - @throws(classOf[DispatcherException]) def dispatch[T](outputs: Option[Array[Dispatcher]], events: RDD[T])(implicit sc: SparkContext, fc: FrameworkContext): Long = { @@ -36,8 +31,7 @@ object OutputDispatcher { JobLogger.log("Dispatching output", Option(dispatcher.to)); DispatcherFactory.getDispatcher(dispatcher).dispatch(dispatcher.params, eventArr); } - events.count; - + 0 } @throws(classOf[DispatcherException]) @@ -50,20 +44,16 @@ object OutputDispatcher { DispatcherFactory.getDispatcher(dispatcher).dispatch(dispatcher.params, eventArr); events.count; } - + @throws(classOf[DispatcherException]) - def dispatch[T](dispatcher: Dispatcher, events: Array[String])(implicit fc: FrameworkContext) = { + def dispatch[T](config: StorageConfig, events: RDD[T])(implicit sc: SparkContext, fc: FrameworkContext): Long = { - if (null == dispatcher) { - throw new DispatcherException("No output configurations found"); - } - if (events.length != 0) { - JobLogger.log("Dispatching output", Option(dispatcher.to)); - DispatcherFactory.getDispatcher(dispatcher).dispatch(events, dispatcher.params); - } else { - JobLogger.log("No events produced"); - null; + if (null == config) { + throw new DispatcherException("No configuration found"); } + val eventArr = stringify(events); + DispatcherFactory.getDispatcher(config).dispatch(eventArr, config); + events.count; } def stringify[T](events: RDD[T]): RDD[String] = { diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModel.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModel.scala new file mode 100644 index 00000000..9feede13 --- /dev/null +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModel.scala @@ -0,0 +1,18 @@ +package org.ekstep.analytics.framework + +import org.apache.spark.sql.{Dataset, SparkSession} + + + +trait ReportOnDemandModel[T] { + + def execute(reportParams: Option[Map[String, AnyRef]])(implicit spark: SparkSession, fc: FrameworkContext) : Unit + + def getJobRequest(jobId: String) (implicit sparkSession: SparkSession, fc: FrameworkContext): Dataset[T] + + def updateJobRequest(reportBlobs : Dataset[T]) (implicit sparkSession: SparkSession, fc: FrameworkContext): Unit + + def name() : String = "OnDemandReportModel"; + + +} \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModelTemplate.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModelTemplate.scala new file mode 100644 index 00000000..b5d59191 --- /dev/null +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/ReportOnDemandModelTemplate.scala @@ -0,0 +1,113 @@ +package org.ekstep.analytics.framework + +import java.sql.DriverManager +import java.util.{Date, Properties} + +import org.apache.spark.sql._ +import org.ekstep.analytics.framework.conf.AppConf +import org.ekstep.analytics.framework.util.CommonUtil + + +trait ReportOnDemandModelTemplate[A <: AnyRef, B <: AnyRef] extends ReportOnDemandModel[OnDemandJobRequest] { + + val connProperties: Properties = CommonUtil.getPostgresConnectionProps() + val db: String = AppConf.getConfig("postgres.db") + val url: String = AppConf.getConfig("postgres.url") + s"$db" + val report_config_table: String = AppConf.getConfig("postgres.table.job_request") + + /** + * Override and implement the data product execute method, + * 1. filterReports + * 2. generateReports + * 3. saveReports + */ + override def execute(reportParams: Option[Map[String, AnyRef]])(implicit spark: SparkSession, fc: FrameworkContext) = { + + val config = reportParams.getOrElse(Map[String, AnyRef]()) + + val reportConfigList = getJobRequest(config.getOrElse("jobId", "").asInstanceOf[String]) + + val filteredReports = filterReports(reportConfigList, config) + + val generatedReports = generateReports(filteredReports, config) + + val savedReportsList = saveReports(generatedReports, config) + + updateJobRequest(savedReportsList) + + } + + /** + * Method will get the list of active on demand reports from table + * + * @param jobId job id of the report + * @param spark sparkSession implicit + * @param fc framework context + * @return + */ + override def getJobRequest(jobId: String)(implicit spark: SparkSession, fc: FrameworkContext): Dataset[OnDemandJobRequest] = { + + val encoder = Encoders.product[OnDemandJobRequest] + import org.apache.spark.sql.functions.col + val reportConfigsDf = spark.sqlContext.sparkSession.read.jdbc(url, report_config_table, connProperties) + .where(col("job_id") === jobId).where(col("status") === "SUBMITTED") + .select("request_id", "request_data","download_urls","status") + reportConfigsDf.as[OnDemandJobRequest](encoder) + } + + + /** + * Method will save the list of report location urls for each request id + * + * @param reportLocationsDf Dataset with list of report paths per request id + * @param spark sparkSession implict + * @param fc frameWorkContext implicit + * @return + */ + override def updateJobRequest(reportLocationsDf: Dataset[OnDemandJobRequest])(implicit spark: SparkSession, fc: FrameworkContext) = { + val connProperties: Properties = CommonUtil.getPostgresConnectionProps() + val db: String = AppConf.getConfig("postgres.db") + val url: String = AppConf.getConfig("postgres.url") + s"$db" + val report_config_table: String = AppConf.getConfig("postgres.table.job_request") + val user = connProperties.getProperty("user") + val pass = connProperties.getProperty("password") + reportLocationsDf.rdd.foreachPartition { rddPartition: Iterator[OnDemandJobRequest] => + val connection = DriverManager.getConnection(url, user, pass) + val statement = connection.createStatement() + rddPartition.foreach { report => + val url = report.download_urls.mkString(",") + val row = + s""" UPDATE ${report_config_table} SET download_urls = '{${url}}', + |dt_job_completed = '${new Date()}',status = 'COMPLETED' where request_id='${report.request_id}' """.stripMargin + statement.addBatch(row) + } + statement.executeBatch() + statement.close() + connection.close() + } + + } + + /** + * filter Reports steps before generating Report. Few pre-process steps are + * 1. Combine or filter the report configs an + * 2. Join or fetch Data from Tables + */ + def filterReports(reportConfigs: Dataset[OnDemandJobRequest], config: Map[String, AnyRef])(implicit spark: SparkSession, fc: FrameworkContext): Dataset[A] + + /** + * Method which will generate report + * Input : List of Filtered Ids to generate Report + * Output : List of Files to be saved per request + */ + def generateReports(reports: Dataset[A], config: Map[String, AnyRef])(implicit spark: SparkSession, fc: FrameworkContext): Dataset[B] + + /** + * . + * 1. Saving Reports to Blob + * 2. Generate Metrics + * 3. Return Map list of blobs to RequestIds as per the request + */ + def saveReports(reports: Dataset[B], config: Map[String, AnyRef])(implicit spark: SparkSession, fc: FrameworkContext): Dataset[OnDemandJobRequest] + +} \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/AzureDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/AzureDispatcher.scala index b1ba1532..9aca1289 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/AzureDispatcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/AzureDispatcher.scala @@ -5,74 +5,50 @@ import scala.concurrent.ExecutionContext.Implicits.global import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.ekstep.analytics.framework.exception.DispatcherException -import org.ekstep.analytics.framework.util.{CommonUtil, JobLogger} +import org.ekstep.analytics.framework.util.{ CommonUtil, JobLogger } import org.sunbird.cloud.storage.conf.AppConf -import org.sunbird.cloud.storage.factory.{StorageConfig, StorageServiceFactory} +import org.sunbird.cloud.storage.factory.{ StorageServiceFactory } import org.ekstep.analytics.framework.Level import scala.concurrent.Await import org.ekstep.analytics.framework.FrameworkContext +import org.apache.hadoop.fs.FileUtil +import org.apache.hadoop.fs.FileSystem +import java.net.URI +import org.apache.hadoop.fs.Path +import org.ekstep.analytics.framework.util.JSONUtils +import org.ekstep.analytics.framework.StorageConfig -object AzureDispatcher extends IDispatcher { +object AzureDispatcher extends HadoopDispatcher with IDispatcher { - implicit val className = "org.ekstep.analytics.framework.dispatcher.AzureDispatcher" + implicit val className = "org.ekstep.analytics.framework.dispatcher.AzureDispatcher" - @throws(classOf[DispatcherException]) - def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = { - var filePath = config.getOrElse("filePath", null).asInstanceOf[String]; - val bucket = config.getOrElse("bucket", null).asInstanceOf[String]; - val key = config.getOrElse("key", null).asInstanceOf[String]; - val zip = config.getOrElse("zip", false).asInstanceOf[Boolean]; - val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean]; + override def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = { - if (null == bucket || null == key) { - throw new DispatcherException("'bucket' & 'key' parameters are required to send output to azure") - } - var deleteFile = false; - if (null == filePath) { - filePath = AppConf.getConfig("spark_output_temp_dir") + "output-" + System.currentTimeMillis() + ".log"; - val fw = new FileWriter(filePath, true); - events.foreach { x => { fw.write(x + "\n"); } }; - fw.close(); - deleteFile = true; - } - val finalPath = if (zip) CommonUtil.gzip(filePath) else filePath; - val storageService = fc.getStorageService("azure"); - storageService.upload(bucket, finalPath, key, Option(isPublic), None, None, None); - storageService.closeContext(); - if (deleteFile) CommonUtil.deleteFile(filePath); - if (zip) CommonUtil.deleteFile(finalPath); - events; - } - - def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = { - -// dispatch(events.collect(), config); - val bucket = config.getOrElse("bucket", null).asInstanceOf[String]; - val key = config.getOrElse("key", null).asInstanceOf[String]; - val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean]; + val bucket = config.getOrElse("bucket", null).asInstanceOf[String]; + val key = config.getOrElse("key", null).asInstanceOf[String]; - if (null == bucket || null == key) { - throw new DispatcherException("'bucket' & 'key' parameters are required to send output to azure") - } - events.saveAsTextFile("wasb://" + bucket + "@" + AppConf.getStorageKey(AppConf.getStorageType()) + ".blob.core.windows.net/" + key); + if (null == bucket || null == key) { + throw new DispatcherException("'bucket' & 'key' parameters are required to send output to azure") } - def dispatchDirectory(config: Map[String, AnyRef])(implicit sc: SparkContext, fc: FrameworkContext) = { - val dirPath = config.getOrElse("dirPath", null).asInstanceOf[String] - val bucket = config.getOrElse("bucket", null).asInstanceOf[String] - val key = config.getOrElse("key", null).asInstanceOf[String] - val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean] + val srcFile = CommonUtil.getAzureFile(bucket, "_tmp/" + key); + val destFile = CommonUtil.getAzureFile(bucket, key); - if (null == bucket || null == key || dirPath == null) { - throw new DispatcherException("'local file path', 'bucket' & 'key' parameters are required to upload directory to azure") - } + dispatchData(srcFile, destFile, sc.hadoopConfiguration, events) + } - val storageService = fc.getStorageService("azure"); - val uploadMsg = storageService.upload(bucket, dirPath, key, Option(true), Option(1), Option(3), None) - storageService.closeContext(); - JobLogger.log("Successfully Uploaded files", Option(Map("filesUploaded" -> "")), Level.INFO) - CommonUtil.deleteDirectory(dirPath) + override def dispatch(events: RDD[String], config: StorageConfig)(implicit sc: SparkContext, fc: FrameworkContext) = { + val bucket = config.container; + val key = config.fileName; + + if (null == bucket || null == key || bucket.isEmpty() || key.isEmpty()) { + throw new DispatcherException("'bucket' & 'key' parameters are required to send output to azure") } + val srcFile = CommonUtil.getAzureFile(bucket, "_tmp/" + key, config.accountKey.getOrElse("azure_storage_key")); + val destFile = CommonUtil.getAzureFile(bucket, key, config.accountKey.getOrElse("azure_storage_key")); + + dispatchData(srcFile, destFile, sc.hadoopConfiguration, events) + } } diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ConsoleDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ConsoleDispatcher.scala index c547a502..22e745da 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ConsoleDispatcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ConsoleDispatcher.scala @@ -19,7 +19,7 @@ object ConsoleDispatcher extends IDispatcher { events; } - def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = { + def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext): Unit = { if (config.getOrElse("printEvent", true).asInstanceOf[Boolean]) { for (event <- events) { println("Event", event); diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ESDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ESDispatcher.scala index 65dad1de..3c86b680 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ESDispatcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/ESDispatcher.scala @@ -20,7 +20,4 @@ object ESDispatcher extends IDispatcher { events.saveToEs(s"$index/_doc", Map("es.input.json" -> "true")) } - override def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = { - events - } } diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/FileDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/FileDispatcher.scala index 1a3aeed2..f45cefb7 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/FileDispatcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/FileDispatcher.scala @@ -10,30 +10,36 @@ import java.nio.file.Files import java.nio.file.Paths import org.apache.spark.SparkContext import org.ekstep.analytics.framework.FrameworkContext +import java.io.File +import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.FileUtil +import org.apache.commons.io.FileUtils +import org.ekstep.analytics.framework.util.CommonUtil +import org.ekstep.analytics.framework.StorageConfig /** * @author Santhosh */ -object FileDispatcher extends IDispatcher { - - implicit val className = "org.ekstep.analytics.framework.dispatcher.FileDispatcher"; - - @throws(classOf[DispatcherException]) - def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = { - val filePath = config.getOrElse("file", null).asInstanceOf[String]; - if (null == filePath) { - throw new DispatcherException("'file' parameter is required to send output to file"); - } - val dir = filePath.substring(0, filePath.lastIndexOf("/")); - Files.createDirectories(Paths.get(dir)); - val fw = new FileWriter(filePath, true); - events.foreach { x => { fw.write(x + "\n"); } }; - fw.close(); - events; - } - - def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = { - dispatch(events.collect(), config); +object FileDispatcher extends HadoopDispatcher with IDispatcher { + + implicit val className = "org.ekstep.analytics.framework.dispatcher.FileDispatcher"; + + override def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext): Unit = { + val filePath = config.getOrElse("file", null).asInstanceOf[String]; + if (null == filePath) { + throw new DispatcherException("'file' parameter is required to send output to file"); } + val path = new File(filePath); + val index = path.getPath.lastIndexOf(path.getName); + val prefix = path.getPath.substring(0, index) + + dispatchData(prefix + "_tmp/" + path.getName, filePath, sc.hadoopConfiguration, events) + } + + override def dispatch(events: RDD[String], config: StorageConfig)(implicit sc: SparkContext, fc: FrameworkContext): Unit = { + val file = config.fileName; + dispatch(Map[String, AnyRef]("file" -> file), events); + } + } \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/HadoopDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/HadoopDispatcher.scala new file mode 100644 index 00000000..bb0bbfdd --- /dev/null +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/HadoopDispatcher.scala @@ -0,0 +1,28 @@ +package org.ekstep.analytics.framework.dispatcher + +import org.ekstep.analytics.framework.exception.DispatcherException +import java.io.FileWriter +import org.ekstep.analytics.framework.OutputDispatcher +import org.apache.spark.rdd.RDD +import org.ekstep.analytics.framework.util.JobLogger +import org.apache.commons.lang3.StringUtils +import java.nio.file.Files +import java.nio.file.Paths +import org.apache.spark.SparkContext +import org.ekstep.analytics.framework.FrameworkContext +import org.apache.hadoop.conf.Configuration + +/** + * @author Santhosh + */ +trait HadoopDispatcher { + + def dispatchData(srcFile: String, destFile: String, conf: Configuration, events: RDD[String])(implicit fc: FrameworkContext) = { + + val fileUtil = fc.getHadoopFileUtil(); + fileUtil.delete(conf, srcFile, destFile); + events.saveAsTextFile(srcFile); + fileUtil.copyMerge(srcFile, destFile, conf, true); + } + +} \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/IDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/IDispatcher.scala index b041dff4..e68ac504 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/IDispatcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/IDispatcher.scala @@ -4,6 +4,7 @@ import org.ekstep.analytics.framework.exception.DispatcherException import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.ekstep.analytics.framework.FrameworkContext +import org.ekstep.analytics.framework.StorageConfig /** * @author Santhosh @@ -11,9 +12,11 @@ import org.ekstep.analytics.framework.FrameworkContext trait IDispatcher { @throws(classOf[DispatcherException]) - def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext) : Array[String]; + def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) : Unit; @throws(classOf[DispatcherException]) - def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext); + def dispatch(events: RDD[String], config: StorageConfig)(implicit sc: SparkContext, fc: FrameworkContext) : Unit = { + throw new DispatcherException("Not supported method"); + } } \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/KafkaDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/KafkaDispatcher.scala index a31544de..094a99db 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/KafkaDispatcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/KafkaDispatcher.scala @@ -28,19 +28,23 @@ object KafkaDispatcher extends IDispatcher { def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = { val brokerList = config.getOrElse("brokerList", null).asInstanceOf[String]; val topic = config.getOrElse("topic", null).asInstanceOf[String]; + val batchSize = config.getOrElse("batchSize", 100).asInstanceOf[Integer]; + val lingerMs = config.getOrElse("lingerMs", 10).asInstanceOf[Integer]; if (null == brokerList) { throw new DispatcherException("brokerList parameter is required to send output to kafka") } if (null == topic) { throw new DispatcherException("topic parameter is required to send output to kafka") } - KafkaEventProducer.sendEvents(events, topic, brokerList) + KafkaEventProducer.sendEvents(events, topic, brokerList, batchSize, lingerMs) events } def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = { val brokerList = config.getOrElse("brokerList", null).asInstanceOf[String] val topic = config.getOrElse("topic", null).asInstanceOf[String] + val batchSize = config.getOrElse("batchSize", 100).asInstanceOf[Integer]; + val lingerMs = config.getOrElse("lingerMs", 10).asInstanceOf[Integer]; if (null == brokerList) { throw new DispatcherException("brokerList parameter is required to send output to kafka") } @@ -49,7 +53,7 @@ object KafkaDispatcher extends IDispatcher { } events.foreachPartition((partitions: Iterator[String]) => { - val kafkaSink = KafkaSink(_getKafkaProducerConfig(brokerList)); + val kafkaSink = KafkaSink(_getKafkaProducerConfig(brokerList, batchSize, lingerMs)); partitions.foreach { message => try { kafkaSink.send(topic, message, new Callback { @@ -76,12 +80,15 @@ object KafkaDispatcher extends IDispatcher { } - private def _getKafkaProducerConfig(brokerList: String): HashMap[String, Object] = { + private def _getKafkaProducerConfig(brokerList: String, batchSize: Integer, lingerMs: Integer): HashMap[String, Object] = { val props = new HashMap[String, Object]() - props.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, 3000L.asInstanceOf[Long]) + props.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize); + props.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 60000.asInstanceOf[Integer]); props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") + props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy") + props.put(ProducerConfig.LINGER_MS_CONFIG, lingerMs) props } diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3Dispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3Dispatcher.scala index 2d8e1e9c..dfb75234 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3Dispatcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3Dispatcher.scala @@ -8,57 +8,37 @@ import org.ekstep.analytics.framework.Level._ import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext import org.sunbird.cloud.storage.factory.StorageServiceFactory -import org.sunbird.cloud.storage.factory.StorageConfig import org.sunbird.cloud.storage.conf.AppConf import org.ekstep.analytics.framework.FrameworkContext +import org.ekstep.analytics.framework.StorageConfig /** * @author Santhosh */ -object S3Dispatcher extends IDispatcher { +object S3Dispatcher extends HadoopDispatcher with IDispatcher { - implicit val className = "org.ekstep.analytics.framework.dispatcher.S3Dispatcher" + implicit val className = "org.ekstep.analytics.framework.dispatcher.S3Dispatcher" - @throws(classOf[DispatcherException]) - def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = { - var filePath = config.getOrElse("filePath", null).asInstanceOf[String]; - val bucket = config.getOrElse("bucket", null).asInstanceOf[String]; - val key = config.getOrElse("key", null).asInstanceOf[String]; - val zip = config.getOrElse("zip", false).asInstanceOf[Boolean]; - val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean]; + override def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext): Unit = { - if (null == bucket || null == key) { - throw new DispatcherException("'bucket' & 'key' parameters are required to send output to S3") - } - var deleteFile = false; - if (null == filePath) { - filePath = AppConf.getConfig("spark_output_temp_dir") + "output-" + System.currentTimeMillis() + ".log"; - val fw = new FileWriter(filePath, true); - events.foreach { x => { fw.write(x + "\n"); } }; - fw.close(); - deleteFile = true; - } - val finalPath = if (zip) CommonUtil.gzip(filePath) else filePath; - Console.println(bucket, finalPath, key, Option(isPublic)) - Console.println("FC class type", fc.getClass.getTypeName); - val storageService = fc.getStorageService("aws"); - storageService.upload(bucket, finalPath, key, Option(isPublic), None, None, None); - storageService.closeContext(); - if (deleteFile) CommonUtil.deleteFile(filePath); - if (zip) CommonUtil.deleteFile(finalPath); - events; - } - - def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = { - - val bucket = config.getOrElse("bucket", null).asInstanceOf[String]; - val key = config.getOrElse("key", null).asInstanceOf[String]; - val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean]; + val bucket = config.getOrElse("bucket", null).asInstanceOf[String]; + val key = config.getOrElse("key", null).asInstanceOf[String]; + val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean]; - if (null == bucket || null == key) { - throw new DispatcherException("'bucket' & 'key' parameters are required to send output to S3") - } - events.saveAsTextFile("s3n://" + bucket + "/" + key); + if (null == bucket || null == key) { + throw new DispatcherException("'bucket' & 'key' parameters are required to send output to S3") } + val srcFile = CommonUtil.getS3File(bucket, "_tmp/" + key); + val destFile = CommonUtil.getS3File(bucket, key); + dispatchData(srcFile, destFile, sc.hadoopConfiguration, events) + } + + override def dispatch(events: RDD[String], config: StorageConfig)(implicit sc: SparkContext, fc: FrameworkContext): Unit = { + val bucket = config.container; + val key = config.fileName; + + dispatch(Map[String, AnyRef]("bucket" -> bucket, "key" -> key), events); + } + } \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3FileDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3FileDispatcher.scala deleted file mode 100644 index 87da2c40..00000000 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/S3FileDispatcher.scala +++ /dev/null @@ -1,53 +0,0 @@ -package org.ekstep.analytics.framework.dispatcher - -import java.io.FileWriter - -import org.apache.spark.SparkContext -import org.apache.spark.rdd.RDD -import org.ekstep.analytics.framework.exception.DispatcherException -import org.ekstep.analytics.framework.util.CommonUtil -import org.sunbird.cloud.storage.factory.StorageServiceFactory -import org.sunbird.cloud.storage.factory.StorageConfig -import org.sunbird.cloud.storage.conf.AppConf -import org.ekstep.analytics.framework.FrameworkContext - -/** - * @author Santhosh - */ -object S3FileDispatcher extends IDispatcher { - - implicit val className = "org.ekstep.analytics.framework.dispatcher.S3FileDispatcher" - - @throws(classOf[DispatcherException]) - def dispatch(events: Array[String], config: Map[String, AnyRef])(implicit fc: FrameworkContext): Array[String] = { - var filePath = config.getOrElse("filePath", null).asInstanceOf[String]; - val bucket = config.getOrElse("bucket", null).asInstanceOf[String]; - val key = config.getOrElse("key", null).asInstanceOf[String]; - val zip = config.getOrElse("zip", false).asInstanceOf[Boolean]; - val isPublic = config.getOrElse("public", false).asInstanceOf[Boolean]; - - if (null == bucket || null == key) { - throw new DispatcherException("'bucket' & 'key' parameters are required to send output to S3") - } - var deleteFile = false; - if (null == filePath) { - filePath = AppConf.getConfig("spark_output_temp_dir") + "output-" + System.currentTimeMillis() + ".log"; - val fw = new FileWriter(filePath, true); - events.foreach { x => { fw.write(x + "\n"); } }; - fw.close(); - deleteFile = true; - } - val finalPath = if (zip) CommonUtil.gzip(filePath) else filePath; - val storageService = fc.getStorageService("aws"); - storageService.upload(bucket, finalPath, key, Option(isPublic), None, None, None); - storageService.closeContext(); - if (deleteFile) CommonUtil.deleteFile(filePath); - if (zip) CommonUtil.deleteFile(finalPath); - events; - } - - def dispatch(config: Map[String, AnyRef], events: RDD[String])(implicit sc: SparkContext, fc: FrameworkContext) = { - dispatch(events.collect(), config); - } - -} \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/SlackDispatcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/SlackDispatcher.scala index 738329e5..cc1961ec 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/SlackDispatcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/dispatcher/SlackDispatcher.scala @@ -29,9 +29,7 @@ object SlackDispatcher extends IDispatcher { } val webhookUrl = AppConf.getConfig("monitor.notification.webhook_url") - val message = if (hasAttachments.equalsIgnoreCase("true")) { - SlackMessage(channel, userName, attachments = Some(events.map(JSONUtils.deserialize[Attachments](_)))) - } else SlackMessage(channel, userName, text = Some(events.mkString(","))) + val message = SlackMessage(channel, userName, text = Some(events.mkString(","))) val resp = RestUtil.post[String](webhookUrl, JSONUtils.serialize(message)) events } diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/factory/DispatcherFactory.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/factory/DispatcherFactory.scala index 1c912339..aba8e3a5 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/factory/DispatcherFactory.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/factory/DispatcherFactory.scala @@ -5,6 +5,7 @@ import org.ekstep.analytics.framework.dispatcher._ import org.ekstep.analytics.framework.exception.DispatcherException import org.ekstep.analytics.framework.util.JobLogger import org.ekstep.analytics.framework.Level._ +import org.ekstep.analytics.framework.StorageConfig /** * @author Santhosh @@ -14,8 +15,6 @@ object DispatcherFactory { @throws(classOf[DispatcherException]) def getDispatcher(disp: Dispatcher): IDispatcher = { disp.to.toLowerCase() match { - case "s3file" => - S3FileDispatcher; case "s3" => S3Dispatcher; case "kafka" => @@ -36,4 +35,18 @@ object DispatcherFactory { throw new DispatcherException("Unknown output dispatcher destination found"); } } + + @throws(classOf[DispatcherException]) + def getDispatcher(config: StorageConfig): IDispatcher = { + config.store.toLowerCase() match { + case "s3" => + S3Dispatcher; + case "local" => + FileDispatcher; + case "azure" => + AzureDispatcher; + case _ => + throw new DispatcherException("Unknown output dispatcher destination found"); + } + } } \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/AzureDataFetcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/AzureDataFetcher.scala index 2aa8cf3e..733225b0 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/AzureDataFetcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/AzureDataFetcher.scala @@ -28,7 +28,7 @@ object AzureDataFetcher { } private def getKeys(query: Query)(implicit fc: FrameworkContext) : Array[String] = { - val storageService = fc.getStorageService("azure"); + val storageService = fc.getStorageService("azure", "azure_storage_key", "azure_storage_secret"); val keys = storageService.searchObjects(getBucket(query.bucket), getPrefix(query.prefix), query.startDate, query.endDate, query.delta, query.datePattern.getOrElse("yyyy-MM-dd")) storageService.getPaths(getBucket(query.bucket), keys).toArray } diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/DruidDataFetcher.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/DruidDataFetcher.scala index 80aac70c..6828a927 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/DruidDataFetcher.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/fetcher/DruidDataFetcher.scala @@ -1,218 +1,409 @@ package org.ekstep.analytics.framework.fetcher +import java.sql.{Connection, DriverManager, ResultSet, Statement} import java.time.format.DateTimeFormatter +import java.util.Properties +import akka.actor.ActorSystem +import akka.http.scaladsl.Http +import akka.http.scaladsl.model._ +import akka.stream.ActorMaterializer +import akka.stream.scaladsl.{Flow, Framing, Keep, Sink, Source} +import akka.util.ByteString import ing.wbaa.druid._ -import ing.wbaa.druid.client.DruidHttpClient import ing.wbaa.druid.definitions._ import ing.wbaa.druid.dql.DSL._ import ing.wbaa.druid.dql.Dim import ing.wbaa.druid.dql.expressions.{AggregationExpression, FilteringExpression, PostAggregationExpression} import io.circe.Json -import org.ekstep.analytics.framework.conf.AppConf +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.ekstep.analytics.framework._ import org.ekstep.analytics.framework.exception.DataFetcherException -import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils} -import org.ekstep.analytics.framework.{DruidQueryModel, FrameworkContext, PostAggregationFields} +import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, ResultAccumulator} +import org.sunbird.cloud.storage.conf.AppConf -import scala.concurrent.Await +import scala.concurrent.{Await, ExecutionContextExecutor, Future} + + +trait AkkaHttpClient { + def sendRequest(httpRequest: HttpRequest)(implicit actorSystem: ActorSystem): Future[HttpResponse] +} + +object AkkaHttpUtil extends AkkaHttpClient { + def sendRequest(httpRequest: HttpRequest)(implicit actorSystem: ActorSystem): Future[HttpResponse] ={ + Http().singleRequest(httpRequest) + } +} object DruidDataFetcher { - - @throws(classOf[DataFetcherException]) - def getDruidData(query: DruidQueryModel)(implicit fc: FrameworkContext): List[String] = { - val request = getDruidQuery(query) - val result = executeDruidQuery(request); - processResult(query, result); - } + @throws(classOf[DataFetcherException]) + def getDruidData(query: DruidQueryModel, queryAsStream: Boolean = false)(implicit sc: SparkContext, fc: FrameworkContext): RDD[String] = { + val request = getDruidQuery(query) + fc.inputEventsCount = sc.longAccumulator("DruidDataCount") + if (queryAsStream) { + executeQueryAsStream(query, request) - def getDruidQuery(query: DruidQueryModel): DruidQuery = { - - query.queryType.toLowerCase() match { - case "groupby" => { - val DQLQuery = DQL - .from(query.dataSource) - .granularity(CommonUtil.getGranularity(query.granularity.getOrElse("all"))) - .interval(CommonUtil.getIntervalRange(query.intervals)) - .agg(getAggregation(query): _*) - .groupBy(query.dimensions.get.map(f => Dim(f.fieldName, f.aliasName)): _*) - if(query.filters.nonEmpty) DQLQuery.where(getFilter(query).get) - if(query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query).get: _*) - if(query.having.nonEmpty) DQLQuery.having(getGroupByHaving(query).get) - DQLQuery.build() - } - case "topn" => { - val DQLQuery = DQL - .from(query.dataSource) - .granularity(CommonUtil.getGranularity(query.granularity.getOrElse("all"))) - .interval(CommonUtil.getIntervalRange(query.intervals)) - .topN(Dim(query.dimensions.get.head.fieldName, query.dimensions.get.head.aliasName), query.metric.getOrElse("count"), query.threshold.getOrElse(100).asInstanceOf[Int]) - .agg(getAggregation(query): _*) - if(query.filters.nonEmpty) DQLQuery.where(getFilter(query).get) - if(query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query).get: _*) - DQLQuery.build() - } - case "timeseries" => { - val DQLQuery = DQL - .from(query.dataSource) - .granularity(CommonUtil.getGranularity(query.granularity.getOrElse("all"))) - .interval(CommonUtil.getIntervalRange(query.intervals)) - .agg(getAggregation(query): _*) - if(query.filters.nonEmpty) DQLQuery.where(getFilter(query).get) - if(query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query).get: _*) - DQLQuery.build() - } - case _ => - throw new DataFetcherException("Unknown druid query type found"); - } + } else { + val response = executeDruidQuery(query, request) + query.queryType.toLowerCase() match { + case "timeseries" | "groupby" | "topn"=> + sc.parallelize(processResult(query, response.asInstanceOf[DruidResponseTimeseriesImpl].results)) + case "scan" => + sc.parallelize(processResult (query, response.asInstanceOf[DruidScanResponse].results.flatMap(f => f.events))) + } } - def executeDruidQuery(query: DruidQuery)(implicit fc: FrameworkContext) : DruidResponse = { - val response = fc.getDruidClient().doQuery(query); - val queryWaitTimeInMins = AppConf.getConfig("druid.query.wait.time.mins").toLong - Await.result(response, scala.concurrent.duration.Duration.apply(queryWaitTimeInMins, "minute")) - } - - def processResult(query: DruidQueryModel, result: DruidResponse) : List[String] = { - if(result.results.length > 0) { - query.queryType.toLowerCase match { - case "timeseries" | "groupby" => - val series = result.results.map { f => - f.result.asObject.get.+:("date", Json.fromString(f.timestamp.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")))).toMap.map { f => - if(f._2.isNull) - (f._1 -> "unknown") - else if ("String".equalsIgnoreCase(f._2.name)) - (f._1 -> f._2.asString.get) - else if("Number".equalsIgnoreCase(f._2.name)) - { - (f._1 -> CommonUtil.roundDouble(f._2.asNumber.get.toDouble, 2)) - } - - else (f._1 -> f._2) - } - } - series.map(f => JSONUtils.serialize(f)) - case "topn" => - val timeMap = Map("date" -> result.results.head.timestamp.format(DateTimeFormatter.ofPattern("yyyy-MM-dd"))) - val series = result.results.map(f => f).head.result.asArray.get.map{f => - val dataMap = f.asObject.get.toMap.map{f => - if(f._2.isNull) - (f._1 -> "unknown") - else if ("String".equalsIgnoreCase(f._2.name)) - (f._1 -> f._2.asString.get) - else if("Number".equalsIgnoreCase(f._2.name)) - (f._1 -> f._2.asNumber.get.toBigDecimal.get) - else (f._1 -> f._2) - } - timeMap ++ dataMap - }.toList - series.map(f => JSONUtils.serialize(f)) - } - } - else - List(); - } - - def getAggregation(query: DruidQueryModel): List[AggregationExpression] = { - query.aggregations.getOrElse(List(org.ekstep.analytics.framework.Aggregation(None, "count", "count"))).map{f => - val aggType = AggregationType.decode(f.`type`).right.getOrElse(AggregationType.Count) - getAggregationByType(aggType, f.name, f.fieldName, f.fnAggregate, f.fnCombine, f.fnReset) - } + } + + def getDruidQuery(query: DruidQueryModel): DruidNativeQuery = { + val dims = query.dimensions.getOrElse(List()) + val druidQuery = DQL + .from(query.dataSource) + .granularity(CommonUtil.getGranularity(query.granularity.getOrElse("all"))) + .interval(getIntervals(query)) + query.queryType.toLowerCase() match { + case "groupby" => { + val DQLQuery = druidQuery.agg(getAggregation(query.aggregations): _*) + .groupBy(dims.map(f => getDimensionByType(f.`type`, f.fieldName, f.aliasName, f.outputType, f.extractionFn)): _*) + if (query.filters.nonEmpty) DQLQuery.where(getFilter(query.filters).get) + if (query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query.postAggregation).get: _*) + if (query.having.nonEmpty) DQLQuery.having(getGroupByHaving(query.having).get) + DQLQuery.build() + } + case "topn" => { + val DQLQuery = druidQuery.topN(getDimensionByType(dims.head.`type`, dims.head.fieldName, + dims.head.aliasName, dims.head.outputType, dims.head.extractionFn), + query.metric.getOrElse("count"), query.threshold.getOrElse(100).asInstanceOf[Int]) + .agg(getAggregation(query.aggregations): _*) + if (query.filters.nonEmpty) DQLQuery.where(getFilter(query.filters).get) + if (query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query.postAggregation).get: _*) + DQLQuery.build() + } + case "timeseries" => { + val DQLQuery = druidQuery.agg(getAggregation(query.aggregations): _*) + if (query.filters.nonEmpty) DQLQuery.where(getFilter(query.filters).get) + if (query.postAggregation.nonEmpty) DQLQuery.postAgg(getPostAggregation(query.postAggregation).get: _*) + DQLQuery.build() + } + case "scan" => { + val DQLQuery = druidQuery.scan() + if (query.filters.nonEmpty) DQLQuery.where(getFilter(query.filters).get) + if (query.columns.nonEmpty) DQLQuery.columns(query.columns.get) + DQLQuery.batchSize(AppConf.getConfig("druid.scan.batch.size").toInt) + DQLQuery.setQueryContextParam("maxQueuedBytes",AppConf.getConfig("druid.scan.batch.bytes")) + DQLQuery.build() + } + + case _ => + throw new DataFetcherException("Unknown druid query type found"); } + } - def getAggregationByType(aggType: AggregationType, name: Option[String], fieldName: String, fnAggregate: Option[String], fnCombine: Option[String], fnReset: Option[String]): AggregationExpression = { - aggType match { - case AggregationType.Count => count as name.getOrElse(s"${AggregationType.Count.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.HyperUnique => dim(fieldName).hyperUnique as name.getOrElse(s"${AggregationType.HyperUnique.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.ThetaSketch => thetaSketch(Dim(fieldName)) as name.getOrElse(s"${AggregationType.ThetaSketch.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.Cardinality => cardinality(Dim(fieldName)) as name.getOrElse(s"${AggregationType.Cardinality.toString.toLowerCase}_${fieldName.toLowerCase()}") - case AggregationType.LongSum => longSum(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongSum.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.DoubleSum => doubleSum(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleSum.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.DoubleMax => doubleMax(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleMax.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.DoubleMin => doubleMin(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleMin.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.LongMax => longMax(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongMax.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.LongMin => longMin(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongMin.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.DoubleFirst => doubleFirst(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleFirst.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.DoubleLast => doubleLast(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleLast.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.LongLast => longLast(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongLast.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.LongFirst =>longFirst(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongFirst.toString.toLowerCase()}_${fieldName.toLowerCase()}") - case AggregationType.Javascript => ing.wbaa.druid.dql.AggregationOps.javascript(name.getOrElse(""), Iterable(fieldName), fnAggregate.get, fnCombine.get, fnReset.get) + def getIntervals(query: DruidQueryModel): String = { + if (query.granularity.getOrElse("all").toUpperCase == "LATEST_INDEX") { + var connection : Connection = null + var statement : Statement = null + try { + val connProperties: Properties = CommonUtil.getPostgresConnectionUserProps(AppConf.getConfig("postgres.druid.user") + , AppConf.getConfig("postgres.druid.pass")) + val db: String = AppConf.getConfig("postgres.druid.db") + val url: String = AppConf.getConfig("postgres.druid.url") + s"$db" + val getLatestIndexQuery = s"""select segment.start, segment.end from druid_segments segment where datasource = '${query.dataSource}' and used='t' order by start desc""" + connection = DriverManager.getConnection(url, connProperties) + statement = connection.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY) + val result: ResultSet = statement.executeQuery(getLatestIndexQuery) + if(result.first()) + result.getString("start") + "/" + result.getString("end") + else + CommonUtil.getIntervalRange(query.intervals, query.dataSource, query.intervalSlider) + }finally{ + statement.close() + connection.close() } + } else { + CommonUtil.getIntervalRange(query.intervals, query.dataSource, query.intervalSlider) } + } + + def executeDruidQuery(model: DruidQueryModel,query: DruidNativeQuery)(implicit sc: SparkContext, fc: FrameworkContext): DruidResponse = { + val response = if(query.dataSource.contains("rollup") || query.dataSource.contains("distinct") + || query.dataSource.contains("snapshot")) fc.getDruidRollUpClient().doQuery(query) + else fc.getDruidClient().doQuery(query) + val queryWaitTimeInMins = AppConf.getConfig("druid.query.wait.time.mins").toLong + Await.result(response, scala.concurrent.duration.Duration.apply(queryWaitTimeInMins, "minute")) + + + } + + def getSQLDruidQuery(model : DruidQueryModel) : DruidSQLQuery ={ + val columns = model.sqlDimensions.get.map({f=> + if(f.function == None) + f.fieldName + else + f.function.get + "AS \"" + f.fieldName + "\"" + + }) + val intervals = CommonUtil.getIntervalRange(model.intervals, model.dataSource, model.intervalSlider) + val sqlString = "SELECT " + columns.mkString(",") + + " from \"druid\".\"" + model.dataSource + "\" where " + + "__time >= '" + intervals.split("/").apply(0).split("T").apply(0) + "' AND __time < '"+ + intervals.split("/").apply(1).split("T").apply(0) + "'" + + DruidSQLQuery(sqlString) + } + + def executeQueryAsStream(model: DruidQueryModel, query: DruidNativeQuery)(implicit sc: SparkContext, fc: FrameworkContext): RDD[String] = { + + implicit val system = if (query.dataSource.contains("rollup") || query.dataSource.contains("distinct") || query.dataSource.contains("snapshot")) + fc.getDruidRollUpClient().actorSystem + else + fc.getDruidClient().actorSystem + implicit val materializer = ActorMaterializer() + + val response = + if (query.dataSource.contains("rollup") || query.dataSource.contains("distinct") || query.dataSource.contains("snapshot")) + fc.getDruidRollUpClient().doQueryAsStream(query) + else + fc.getDruidClient().doQueryAsStream(query) + + val druidResult: Future[RDD[String]] = + response + .via(new ResultAccumulator[BaseResult]) + .map(f => processResult(model,f)) + .map(sc.parallelize(_)) + .toMat(Sink.fold[RDD[String], RDD[String]]((sc.emptyRDD[String]))(_ union _))(Keep.right).run() - def getFilter(query: DruidQueryModel): Option[FilteringExpression] = { - if (query.filters.nonEmpty) { - val filters = query.filters.get.map { f => - val values = if (f.values.isEmpty && f.value.isEmpty) List() else if (f.values.isEmpty) List(f.value.get) else f.values.get - getFilterByType(f.`type`, f.dimension, values) + val queryWaitTimeInMins = AppConf.getConfig("druid.query.wait.time.mins").toLong + Await.result(druidResult, scala.concurrent.duration.Duration.apply(queryWaitTimeInMins, "minute")) + } + + def executeSQLQuery(model: DruidQueryModel, client: AkkaHttpClient)(implicit sc: SparkContext, fc: FrameworkContext): RDD[DruidOutput] = { + + val druidQuery = getSQLDruidQuery(model) + fc.inputEventsCount = sc.longAccumulator("DruidDataCount") + implicit val system = fc.getDruidRollUpClient().actorSystem + implicit val materializer = ActorMaterializer() + implicit val ec: ExecutionContextExecutor = system.dispatcher + val url = String.format("%s://%s:%s%s%s", "http", AppConf.getConfig("druid.rollup.host"), + AppConf.getConfig("druid.rollup.port"), AppConf.getConfig("druid.url"), "sql") + val request = HttpRequest(method = HttpMethods.POST, + uri = url, + entity = HttpEntity(ContentTypes.`application/json`, JSONUtils.serialize(druidQuery))) + val responseFuture: Future[HttpResponse] = client.sendRequest(request) + + val convertStringFlow = + Flow[ByteString].map(s => s.utf8String.trim) + + val result = Source.fromFuture[HttpResponse](responseFuture) + .flatMapConcat(response => response.entity.withoutSizeLimit() + .dataBytes.via(Framing.delimiter(ByteString("\n"), + AppConf.getConfig("druid.scan.batch.bytes").toInt, true))) + .via(convertStringFlow).via(new ResultAccumulator[String]) + .map(events => { + fc.inputEventsCount.add(events.filter(p=> !p.isEmpty).size) + sc.parallelize(events) + }) + .toMat(Sink.fold[RDD[String], RDD[String]]((sc.emptyRDD[String]))(_ union _))(Keep.right).run() + + val data = Await.result(result, scala.concurrent.duration.Duration. + apply(AppConf.getConfig("druid.query.wait.time.mins").toLong, "minute")) + data.filter(f => !f.isEmpty).map(f=> processSqlResult(f)) + } + + + def processResult(query: DruidQueryModel, result: Seq[BaseResult])(implicit fc: FrameworkContext): Seq[String] = { + if (result.nonEmpty) { + fc.inputEventsCount.add(result.size) + query.queryType.toLowerCase match { + case "timeseries" | "groupby" => + val series = result.asInstanceOf[List[DruidResult]].map { f => + f.result.asObject.get.+:("date", Json.fromString(f.timestamp.get.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")))).toMap.map { f => + if (f._2.isNull) + (f._1 -> "unknown") + else if ("String".equalsIgnoreCase(f._2.name)) + (f._1 -> f._2.asString.get) + else if ("Number".equalsIgnoreCase(f._2.name)) { + (f._1 -> CommonUtil.roundDouble(f._2.asNumber.get.toDouble, 2)) + } else (f._1 -> f._2) } - Option(conjunction(filters: _*)) - } - else None + } + series.map(f => JSONUtils.serialize(f)) + case "topn" => + val timeMap = Map("date" -> result.head.timestamp.get.format(DateTimeFormatter.ofPattern("yyyy-MM-dd"))) + val series = result.asInstanceOf[List[DruidResult]].map(f => f).head.result.asArray.get.map { f => + val dataMap = f.asObject.get.toMap.map { f => + if (f._2.isNull) + (f._1 -> "unknown") + else if ("String".equalsIgnoreCase(f._2.name)) + (f._1 -> f._2.asString.get) + else if ("Number".equalsIgnoreCase(f._2.name)) + (f._1 -> f._2.asNumber.get.toBigDecimal.get) + else (f._1 -> f._2) + } + timeMap ++ dataMap + }.toList + series.map(f => JSONUtils.serialize(f)) + case "scan"=> + val series = result.toList.asInstanceOf[List[DruidScanResult]].map { f => + f.result.asObject.get.+:("date", Json.fromString(f.timestamp.get.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")))).toMap.map { f => + if (f._2.isNull) + (f._1 -> "unknown") + else if ("String".equalsIgnoreCase(f._2.name)) + (f._1 -> f._2.asString.get) + else if ("Number".equalsIgnoreCase(f._2.name)) { + (f._1 -> CommonUtil.roundDouble(f._2.asNumber.get.toDouble, 2)) + } else { + (f._1 -> JSONUtils.deserialize[Map[String,Any]](JSONUtils.serialize(f._2)).get("value").get) + } + } + } + series.map(f => JSONUtils.serialize(f)) + } + } else + List(); + } + def processSqlResult(result: String): DruidOutput = { + + val finalResult = JSONUtils.deserialize[Map[String,Any]](result) + val finalMap =finalResult.map(m => { + if(m._2== null) + (m._1, "unknown") + else if (m._2.isInstanceOf[String]) + (m._1, if(m._2.toString.isEmpty) "unknown" else m._2) + else (m._1,m._2)}) + DruidOutput(finalMap) + } + + def getAggregation(aggregations: Option[List[org.ekstep.analytics.framework.Aggregation]]): List[AggregationExpression] = { + aggregations.getOrElse(List(org.ekstep.analytics.framework.Aggregation(None, "count", "count"))).map { f => + val aggType = AggregationType.decode(f.`type`).right.getOrElse(AggregationType.Count) + getAggregationByType(aggType, f.name, f.fieldName, f.fnAggregate, f.fnCombine, f.fnReset, f.lgK, f.tgtHllType, f.round, f.filterAggType, f.filterFieldName, f.filterValue) } + } - def getFilterByType(filterType: String, dimension: String, values: List[AnyRef]): FilteringExpression = { - filterType.toLowerCase match { - case "isnull" => Dim(dimension).isNull - case "isnotnull" => Dim(dimension).isNotNull - case "equals" => Dim(dimension) === values.head.asInstanceOf[String] - case "notequals" => Dim(dimension) =!= values.head.asInstanceOf[String] - case "containsignorecase" => Dim(dimension).containsIgnoreCase(values.head.asInstanceOf[String]) - case "contains" => Dim(dimension).contains(values.head.asInstanceOf[String], true) - case "in" => Dim(dimension) in values.asInstanceOf[List[String]] - case "notin" => Dim(dimension) notIn values.asInstanceOf[List[String]] - case "regex" => Dim(dimension) regex values.head.asInstanceOf[String] - case "like" => Dim(dimension) like values.head.asInstanceOf[String] - case "greaterthan" => Dim(dimension).between(values.head.asInstanceOf[Number].doubleValue(), Integer.MAX_VALUE, true, false) - case "lessthan" => Dim(dimension).between(0, values.head.asInstanceOf[Number].doubleValue(), false, true) - } + def getAggregationByType(aggType: AggregationType, name: Option[String], fieldName: String, fnAggregate: Option[String] = None, fnCombine: Option[String] = None, fnReset: Option[String] = None, lgk: Option[Int] = None, tgtHllType: Option[String] = None, round: Option[Boolean] = None, filterAggType: Option[String] = None, filterFieldName: Option[String] = None, filterValue: Option[AnyRef] = None): AggregationExpression = { + aggType match { + case AggregationType.Count => count as name.getOrElse(s"${AggregationType.Count.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.HyperUnique => dim(fieldName).hyperUnique as name.getOrElse(s"${AggregationType.HyperUnique.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.ThetaSketch => thetaSketch(Dim(fieldName)) as name.getOrElse(s"${AggregationType.ThetaSketch.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.Cardinality => cardinality(Dim(fieldName)) as name.getOrElse(s"${AggregationType.Cardinality.toString.toLowerCase}_${fieldName.toLowerCase()}") + case AggregationType.LongSum => longSum(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongSum.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.DoubleSum => doubleSum(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleSum.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.DoubleMax => doubleMax(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleMax.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.DoubleMin => doubleMin(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleMin.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.LongMax => longMax(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongMax.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.LongMin => longMin(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongMin.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.DoubleFirst => doubleFirst(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleFirst.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.DoubleLast => doubleLast(Dim(fieldName)) as name.getOrElse(s"${AggregationType.DoubleLast.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.LongLast => longLast(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongLast.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.LongFirst => longFirst(Dim(fieldName)) as name.getOrElse(s"${AggregationType.LongFirst.toString.toLowerCase()}_${fieldName.toLowerCase()}") + case AggregationType.Javascript => ing.wbaa.druid.dql.AggregationOps.javascript(name.getOrElse(""), Iterable(fieldName), fnAggregate.get, fnCombine.get, fnReset.get) + case AggregationType.HLLSketchMerge => ing.wbaa.druid.dql.AggregationOps.hllAggregator(fieldName, name.getOrElse(s"${AggregationType.HLLSketchMerge.toString.toLowerCase()}_${fieldName.toLowerCase()}"), lgk.getOrElse(12), tgtHllType.getOrElse("HLL_4"), round.getOrElse(true)) + case AggregationType.Filtered => getFilteredAggregationByType(filterAggType, name, fieldName, filterFieldName, filterValue) + // case _ => throw new Exception("Unsupported aggregation type") } + } - def getPostAggregation(query: DruidQueryModel): Option[List[PostAggregationExpression]] = { - if (query.postAggregation.nonEmpty) { - Option(query.postAggregation.get.map { f => - PostAggregationType.decode(f.`type`) match { - case Right(x) => getPostAggregationByType(x, f.name, f.fields, f.fn) - case Left(x) => throw x - } - }) - } - else None + def getFilteredAggregationByType(aggType: Option[String], name: Option[String], fieldName: String, filterFieldName: Option[String], filterValue: Option[AnyRef]): AggregationExpression = { + if (aggType.nonEmpty || filterFieldName.nonEmpty || filterValue.nonEmpty) + ing.wbaa.druid.dql.AggregationOps.selectorFiltered(filterFieldName.get, getAggregationByType(AggregationType.decode(aggType.get).right.get, name, fieldName), filterValue.get.toString) + else + throw new DataFetcherException("Missing fields for filter type aggregation"); + } + + def getFilter(filters: Option[List[DruidFilter]]): Option[FilteringExpression] = { + + if (filters.nonEmpty) { + val filterExprs = filters.get.map { f => + val values = if (f.values.isEmpty && f.value.isEmpty) List() else if (f.values.isEmpty) List(f.value.get) else f.values.get + getFilterByType(f.`type`, f.dimension, values) + } + Option(conjunction(filterExprs: _*)) + } else None + + } + + def getFilterByType(filterType: String, dimension: String, values: List[AnyRef]): FilteringExpression = { + filterType.toLowerCase match { + case "isnull" => Dim(dimension).isNull + case "isnotnull" => Dim(dimension).isNotNull + case "equals" => Dim(dimension) === values.head.asInstanceOf[String] + case "notequals" => Dim(dimension) =!= values.head.asInstanceOf[String] + case "containsignorecase" => Dim(dimension).containsIgnoreCase(values.head.asInstanceOf[String]) + case "contains" => Dim(dimension).contains(values.head.asInstanceOf[String], true) + case "in" => Dim(dimension) in values.asInstanceOf[List[String]] + case "notin" => Dim(dimension) notIn values.asInstanceOf[List[String]] + case "regex" => Dim(dimension) regex values.head.asInstanceOf[String] + case "like" => Dim(dimension) like values.head.asInstanceOf[String] + case "greaterthan" => Dim(dimension).between(values.head.asInstanceOf[Number].doubleValue(), Integer.MAX_VALUE, true, false) + case "lessthan" => Dim(dimension).between(0, values.head.asInstanceOf[Number].doubleValue(), false, true) + case _ => throw new Exception("Unsupported filter type") } + } + + def getPostAggregation(postAggregation: Option[List[org.ekstep.analytics.framework.PostAggregation]]): Option[List[PostAggregationExpression]] = { + if (postAggregation.nonEmpty) { + Option(postAggregation.get.map { f => + PostAggregationType.decode(f.`type`) match { + case Right(x) => getPostAggregationByType(x, f.name, f.fields, f.fn) + case Left(x) => throw x + } + }) + } else None + } - def getPostAggregationByType(postAggType: PostAggregationType, name: String, fields: PostAggregationFields, fn: String): PostAggregationExpression = { - postAggType match { - case PostAggregationType.Arithmetic => - fn match { - // only right field can have type as Constant or FieldAccess - case "+" => if("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).+(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).+(Dim(fields.rightField.asInstanceOf[String])) as name - case "-" => if("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).-(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).-(Dim(fields.rightField.asInstanceOf[String])) as name - case "*" => if("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).*(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).*(Dim(fields.rightField.asInstanceOf[String])) as name - case "/" => if("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField)./(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField)./(Dim(fields.rightField.asInstanceOf[String])) as name - } - case PostAggregationType.Javascript => javascript(name, Seq(Dim(fields.leftField),Dim(fields.rightField.asInstanceOf[String])), fn) + def getPostAggregationByType(postAggType: PostAggregationType, name: String, fields: PostAggregationFields, fn: String): PostAggregationExpression = { + postAggType match { + case PostAggregationType.Arithmetic => + fn match { + // only right field can have type as Constant or FieldAccess + case "+" => if ("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).+(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).+(Dim(fields.rightField.asInstanceOf[String])) as name + case "-" => if ("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).-(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).-(Dim(fields.rightField.asInstanceOf[String])) as name + case "*" => if ("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField).*(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField).*(Dim(fields.rightField.asInstanceOf[String])) as name + case "/" => if ("constant".equalsIgnoreCase(fields.rightFieldType)) Dim(fields.leftField)./(fields.rightField.asInstanceOf[Number].doubleValue()) as name else Dim(fields.leftField)./(Dim(fields.rightField.asInstanceOf[String])) as name } + case PostAggregationType.Javascript => + if(fields.rightField.asInstanceOf[String].isEmpty) javascript(name, Seq(Dim(fields.leftField)), fn) + else javascript(name, Seq(Dim(fields.leftField), Dim(fields.rightField.asInstanceOf[String])), fn) + case _ => throw new Exception("Unsupported post aggregation type") } + } - def getGroupByHaving(query: DruidQueryModel): Option[FilteringExpression] = { + def getGroupByHaving(having: Option[DruidHavingFilter]): Option[FilteringExpression] = { - if (query.having.nonEmpty) { - HavingType.decode(query.having.get.`type`) match { - case Right(x) => Option(getGroupByHavingByType(x, query.having.get.aggregation, query.having.get.value)) - case Left(x) => throw x - } - } - else None + if (having.nonEmpty) { + HavingType.decode(having.get.`type`) match { + case Right(x) => Option(getGroupByHavingByType(x, having.get.aggregation, having.get.value)) + case Left(x) => throw x + } + } else None + } + + def getGroupByHavingByType(postAggType: HavingType, field: String, value: AnyRef): FilteringExpression = { + postAggType match { + case HavingType.EqualTo => Dim(field) === value.asInstanceOf[String] + case HavingType.Not => Dim(field) =!= value.asInstanceOf[String] + case HavingType.GreaterThan => Dim(field) > value.asInstanceOf[Number].doubleValue() + case HavingType.LessThan => Dim(field) < value.asInstanceOf[Number].doubleValue() + case _ => throw new Exception("Unsupported group by having type") } + } - def getGroupByHavingByType(postAggType: HavingType, field: String, value: AnyRef): FilteringExpression = { - postAggType match { - case HavingType.EqualTo => Dim(field) === value.asInstanceOf[String] - case HavingType.Not => Dim(field) =!= value.asInstanceOf[String] - case HavingType.GreaterThan => Dim(field) > value.asInstanceOf[Number].doubleValue() - case HavingType.LessThan => Dim(field) < value.asInstanceOf[Number].doubleValue() - } + def getDimensionByType(`type`: Option[String], fieldName: String, aliasName: Option[String], outputType: Option[String] = None, extractionFn: Option[List[ExtractFn]] = None): Dim = { + `type`.getOrElse("default").toLowerCase match { + case "default" => Dim(fieldName, aliasName) + case "extraction" => Dim(fieldName,aliasName,outputType).extract(getExtractionFn(extractionFn.get.head)) + case "cascade" => Dim(fieldName, aliasName, outputType).extract(CascadeExtractionFn(Seq(extractionFn.get.map(f => getExtractionFn(f)): _*))) } -} + } + def getExtractionFn(extractionFunc: ExtractFn): ExtractionFn = { + extractionFunc.`type`.toLowerCase match { + case "javascript" => JavascriptExtractionFn(extractionFunc.fn).asInstanceOf[ExtractionFn] + case "registeredlookup" => RegisteredLookupExtractionFn(extractionFunc.fn, extractionFunc.retainMissingValue, extractionFunc.replaceMissingValueWith).asInstanceOf[ExtractionFn] + } + } +} \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/CommonUtil.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/CommonUtil.scala index 3b780cb0..9fcf5843 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/CommonUtil.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/CommonUtil.scala @@ -2,14 +2,15 @@ package org.ekstep.analytics.framework.util import java.io._ import java.net.URL +import java.nio.file.Files import java.nio.file.Paths.get -import java.nio.file.{Files, Paths, StandardCopyOption} import java.security.MessageDigest import java.sql.Timestamp -import java.util.{Date, Properties} import java.util.zip.GZIPOutputStream +import java.util.{Date, Properties} import ing.wbaa.druid.definitions.{Granularity, GranularityType} +import org.apache.hadoop.conf.Configuration import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} import org.ekstep.analytics.framework.Level._ @@ -24,7 +25,6 @@ import org.apache.commons.lang3.StringUtils import org.joda.time.format.{DateTimeFormat, DateTimeFormatter} import org.joda.time.{DateTime, DateTimeZone, Days, LocalDate, Weeks, Years} import org.sunbird.cloud.storage.conf.AppConf - import scala.util.control.Breaks._ object CommonUtil { @@ -39,6 +39,7 @@ object CommonUtil { @transient val dayPeriod: DateTimeFormatter = DateTimeFormat.forPattern("yyyyMMdd").withZone(DateTimeZone.forOffsetHoursMinutes(5, 30)); @transient val monthPeriod: DateTimeFormatter = DateTimeFormat.forPattern("yyyyMM").withZone(DateTimeZone.forOffsetHoursMinutes(5, 30)); @transient val dayPeriodFormat: DateTimeFormatter = DateTimeFormat.forPattern("yyyyMMdd").withZoneUTC(); + val offset: Long = DateTimeZone.forID("Asia/Kolkata").getOffset(DateTime.now()) def getParallelization(config: JobConfig): Int = { @@ -52,7 +53,9 @@ object CommonUtil { fc; } - def getSparkContext(parallelization: Int, appName: String, sparkCassandraConnectionHost: Option[AnyRef] = None, sparkElasticsearchConnectionHost: Option[AnyRef] = None): SparkContext = { + def getSparkContext(parallelization: Int, appName: String, sparkCassandraConnectionHost: Option[AnyRef] = None, + sparkElasticsearchConnectionHost: Option[AnyRef] = None, sparkRedisConnectionHost: Option[AnyRef] = None, + sparkRedisDB: Option[AnyRef] = None, sparkRedisPort: Option[AnyRef] = Option("6379")): SparkContext = { JobLogger.log("Initializing Spark Context") val conf = new SparkConf().setAppName(appName).set("spark.default.parallelism", parallelization.toString) .set("spark.driver.memory", AppConf.getConfig("spark.driver_memory")) @@ -67,6 +70,7 @@ object CommonUtil { if (!conf.contains("spark.cassandra.connection.host")) conf.set("spark.cassandra.connection.host", AppConf.getConfig("spark.cassandra.connection.host")) + // $COVERAGE-ON$ if (sparkCassandraConnectionHost.nonEmpty) { conf.set("spark.cassandra.connection.host", sparkCassandraConnectionHost.get.asInstanceOf[String]) @@ -80,7 +84,12 @@ object CommonUtil { conf.set("es.write.rest.error.handler.log.logger.level", "INFO") } - // $COVERAGE-ON$ + if(sparkRedisConnectionHost.nonEmpty && sparkRedisDB.nonEmpty) { + conf.set("spark.redis.host", sparkRedisConnectionHost.get.asInstanceOf[String]) + conf.set("spark.redis.port", sparkRedisPort.get.asInstanceOf[String]) + conf.set("spark.redis.db", sparkRedisDB.get.asInstanceOf[String]) + } + val sc = new SparkContext(conf) setS3Conf(sc) setAzureConf(sc) @@ -89,12 +98,17 @@ object CommonUtil { } def getSparkSession(parallelization: Int, appName: String, sparkCassandraConnectionHost: Option[AnyRef] = None, - sparkElasticsearchConnectionHost: Option[AnyRef] = None, readConsistencyLevel: Option[String] = None): SparkSession = { + sparkElasticsearchConnectionHost: Option[AnyRef] = None, readConsistencyLevel: Option[String] = None, + sparkRedisConnectionHost: Option[AnyRef] = None, sparkRedisDB: Option[AnyRef] = None, + sparkRedisPort: Option[AnyRef] = Option("6379")): SparkSession = { JobLogger.log("Initializing SparkSession") val conf = new SparkConf().setAppName(appName).set("spark.default.parallelism", parallelization.toString) .set("spark.driver.memory", AppConf.getConfig("spark.driver_memory")) .set("spark.memory.fraction", AppConf.getConfig("spark.memory_fraction")) .set("spark.memory.storageFraction", AppConf.getConfig("spark.storage_fraction")) + .set("spark.sql.extensions", "com.datastax.spark.connector.CassandraSparkExtensions") + .set("directJoinSetting", "on") + val master = conf.getOption("spark.master") // $COVERAGE-OFF$ Disabling scoverage as the below code cannot be covered as they depend on environment variables if (master.isEmpty) { @@ -103,14 +117,13 @@ object CommonUtil { } if (!conf.contains("spark.cassandra.connection.host")) - conf.set("spark.cassandra.connection.host", AppConf.getConfig("spark.cassandra.connection.host")) - if (embeddedCassandraMode) - conf.set("spark.cassandra.connection.port", AppConf.getConfig("cassandra.service.embedded.connection.port")) + conf.set("spark.cassandra.connection.host", AppConf.getConfig("spark.cassandra.connection.host")) + // $COVERAGE-ON$ if (sparkCassandraConnectionHost.nonEmpty) { conf.set("spark.cassandra.connection.host", sparkCassandraConnectionHost.get.asInstanceOf[String]) if (readConsistencyLevel.nonEmpty) { - conf.set("spark.cassandra.input.consistency.level", readConsistencyLevel.get); + conf.set("spark.cassandra.input.consistency.level", readConsistencyLevel.get) } println("setting spark.cassandra.connection.host to lp-cassandra", conf.get("spark.cassandra.connection.host")) } @@ -121,10 +134,14 @@ object CommonUtil { conf.set("es.write.rest.error.handler.log.logger.name", "org.ekstep.es.dispatcher") conf.set("es.write.rest.error.handler.log.logger.level", "INFO") conf.set("es.write.operation", "upsert") + } + if(sparkRedisConnectionHost.nonEmpty && sparkRedisDB.nonEmpty) { + conf.set("spark.redis.host", sparkRedisConnectionHost.get.asInstanceOf[String]) + conf.set("spark.redis.port", sparkRedisPort.get.asInstanceOf[String]) + conf.set("spark.redis.db", sparkRedisDB.get.asInstanceOf[String]) } - // $COVERAGE-ON$ val sparkSession = SparkSession.builder().appName("sunbird-analytics").config(conf).getOrCreate() setS3Conf(sparkSession.sparkContext) setAzureConf(sparkSession.sparkContext) @@ -132,11 +149,6 @@ object CommonUtil { sparkSession } - private def embeddedCassandraMode(): Boolean = { - val isEmbedded = AppConf.getConfig("cassandra.service.embedded.enable"); - StringUtils.isNotBlank(isEmbedded) && StringUtils.equalsIgnoreCase("true", isEmbedded); - } - def setS3Conf(sc: SparkContext) = { JobLogger.log("Configuring S3 AccessKey& SecrateKey to SparkContext") sc.hadoopConfiguration.set("fs.s3n.awsAccessKeyId", AppConf.getAwsKey()); @@ -148,6 +160,7 @@ object CommonUtil { val accKey = AppConf.getStorageSecret("azure") sc.hadoopConfiguration.set("fs.azure", "org.apache.hadoop.fs.azure.NativeAzureFileSystem") sc.hadoopConfiguration.set("fs.azure.account.key." + accName + ".blob.core.windows.net", accKey) + sc.hadoopConfiguration.set("fs.azure.account.keyprovider." + accName + ".blob.core.windows.net", "org.apache.hadoop.fs.azure.SimpleKeyProvider") } def closeSparkContext()(implicit sc: SparkContext) { @@ -191,11 +204,6 @@ object CommonUtil { Files.createDirectories(path); } - def copyFile(from: InputStream, path: String, fileName: String) = { - createDirectory(path); - Files.copy(from, Paths.get(path + fileName), StandardCopyOption.REPLACE_EXISTING); - } - def deleteFile(file: String) { JobLogger.log("Deleting file ", Option(file)) val path = get(file); @@ -280,14 +288,6 @@ object CommonUtil { if (event.gdata != null) event.gdata.ver else null; } - def getGameId(event: V3Event): String = { - if (event.`object`.isEmpty) null else event.`object`.get.id; - } - - def getGameVersion(event: V3Event): String = { - if (event.`object`.isEmpty) null else event.`object`.get.ver.getOrElse(null); - } - def getParallelization(config: Option[Map[String, String]]): Int = { getParallelization(config.getOrElse(Map[String, String]())); } @@ -350,26 +350,6 @@ object CommonUtil { zip.close() } - def zipFolder(outFile: String, dir: String) = { - import java.io.{BufferedInputStream, FileInputStream, FileOutputStream} - import java.util.zip.{ZipEntry, ZipOutputStream} - - val zip = new ZipOutputStream(new FileOutputStream(outFile)) - val files = new File(dir).listFiles(); - files.foreach { file => - zip.putNextEntry(new ZipEntry(file.getName.split("/").last)) - val in = new BufferedInputStream(new FileInputStream(file)) - var b = in.read() - while (b > -1) { - zip.write(b) - b = in.read() - } - in.close() - zip.closeEntry() - } - zip.close() - } - // zipping nested directories def zipDir(zipFileName: String, dir: String) { val dirObj = new File(dir); @@ -593,32 +573,6 @@ object CommonUtil { x.toArray; } - def getValidTags(event: Any, registeredTags: Array[String]): Array[String] = { - - val appTag = if (event.isInstanceOf[DerivedEvent]) { - event.asInstanceOf[DerivedEvent].etags.get.app - } else if (event.isInstanceOf[Event]) { - getETags(event.asInstanceOf[Event]).app - } else if (event.isInstanceOf[V3Event]) { - getETags(event.asInstanceOf[V3Event]).app - } else { - None - } - val dimTag = if (event.isInstanceOf[DerivedEvent]) { - event.asInstanceOf[DerivedEvent].etags.get.dims - } else if (event.isInstanceOf[Event]) { - getETags(event.asInstanceOf[Event]).dims - } else if (event.isInstanceOf[V3Event]) { - getETags(event.asInstanceOf[V3Event]).dims - } else { - None - } - val genieTagFilter = if (appTag.isDefined) appTag.get else List() - val dimTagFilter = if (dimTag.isDefined) dimTag.get else List() - val tagFilter = genieTagFilter ++ dimTagFilter - tagFilter.filter { x => registeredTags.contains(x) }.toArray; - } - def getValidTagsForWorkflow(event: DerivedEvent, registeredTags: Array[String]): Array[String] = { val tagFilter = if (event.tags != null && !event.tags.isEmpty) { event.tags.get.asInstanceOf[List[String]] } else List() tagFilter.filter { x => registeredTags.contains(x) }.toArray; @@ -664,7 +618,7 @@ object CommonUtil { if (event.isInstanceOf[Event]) { if (event.asInstanceOf[Event].channel.nonEmpty && StringUtils.isNotBlank(event.asInstanceOf[Event].channel.get)) event.asInstanceOf[Event].channel.get else defaultChannelId } else if (event.isInstanceOf[V3Event]) { - if (event.asInstanceOf[V3Event].context.channel.nonEmpty && StringUtils.isNotBlank(event.asInstanceOf[V3Event].context.channel)) event.asInstanceOf[V3Event].context.channel else defaultChannelId + if (StringUtils.isNotBlank(event.asInstanceOf[V3Event].context.channel)) event.asInstanceOf[V3Event].context.channel else defaultChannelId } else if (event.isInstanceOf[DerivedEvent]) { if (event.asInstanceOf[DerivedEvent].dimensions.channel.nonEmpty) event.asInstanceOf[DerivedEvent].dimensions.channel.get else if (StringUtils.isBlank(event.asInstanceOf[DerivedEvent].channel)) defaultChannelId else event.asInstanceOf[DerivedEvent].channel } else if (event.isInstanceOf[ProfileEvent]) { @@ -672,40 +626,6 @@ object CommonUtil { } else defaultChannelId; } - def getETags(event: Event): ETags = { - if (event.etags.isDefined) { - event.etags.get; - } else { - if (event.tags != null) { - val tags = event.tags.asInstanceOf[List[Map[String, List[String]]]] - val genieTags = tags.filter(f => f.contains("genie")).map { x => x.get("genie").get }.flatMap { x => x } - val partnerTags = tags.filter(f => f.contains("partner")).map { x => x.get("partner").get }.flatMap { x => x } - val dims = tags.filter(f => f.contains("dims")).map { x => x.get("dims").get }.flatMap { x => x } - ETags(Option(genieTags), Option(partnerTags), Option(dims)) - } else { - ETags() - } - - } - } - - def getETags(event: V3Event): ETags = { - if (event.tags != null && !event.tags.isEmpty) { - val first = event.tags.apply(0) - if (first.isInstanceOf[String]) { - ETags(Option(event.tags.asInstanceOf[List[String]])) - } else { - val tags = event.tags.asInstanceOf[List[Map[String, List[String]]]] - val genieTags = tags.filter(f => f.contains("genie")).map { x => x.get("genie").get }.flatMap { x => x } - val partnerTags = tags.filter(f => f.contains("partner")).map { x => x.get("partner").get }.flatMap { x => x } - val dims = tags.filter(f => f.contains("dims")).map { x => x.get("dims").get }.flatMap { x => x } - ETags(Option(genieTags), Option(partnerTags), Option(dims)) - } - } else { - ETags() - } - } - def dayPeriodToLong(period: Int): Long = { val p = period.toString() if (8 == p.length()) { @@ -725,40 +645,45 @@ object CommonUtil { } // parse druid query interval - def getIntervalRange(period: String): String = { + def getIntervalRange(period: String, dataSource: String, intervalSlider: Int = 0): String = { // LastDay, LastWeek, LastMonth, Last7Days, Last30Days period match { - case "LastDay" => getDayRange(1); + case "LastDay" => getDayRange(1, dataSource, intervalSlider); case "LastWeek" => getWeekRange(1); case "LastMonth" => getMonthRange(1); - case "Last7Days" => getDayRange(7); - case "Last30Days" => getDayRange(30); + case "Last7Days" => getDayRange(7, dataSource, intervalSlider); + case "Last30Days" => getDayRange(30, dataSource, intervalSlider); case _ => period; } } - def getDayRange(count: Int): String = { - val endDate = DateTime.now(DateTimeZone.UTC); - val startDate = endDate.minusDays(count).toString("yyyy-MM-dd"); - startDate + "/" + endDate.toString("yyyy-MM-dd") + def getDayRange(count: Int, dataSource: String, intervalSlider: Int): String = { + val endDate = if(dataSource.contains("rollup") || dataSource.contains("distinct")) DateTime.now(DateTimeZone.UTC).withTimeAtStartOfDay().minusDays(intervalSlider) else DateTime.now(DateTimeZone.UTC).withTimeAtStartOfDay().minusDays(intervalSlider).plus(offset) + val startDate = endDate.minusDays(count).toString("yyyy-MM-dd'T'HH:mm:ssZZ"); + startDate + "/" + endDate.toString("yyyy-MM-dd'T'HH:mm:ssZZ") } def getMonthRange(count: Int): String = { - val currentDate = DateTime.now(DateTimeZone.UTC); - val startDate = currentDate.minusDays(count * 30).dayOfMonth().withMinimumValue().toString("yyyy-MM-dd"); - val endDate = currentDate.dayOfMonth().withMinimumValue().toString("yyyy-MM-dd"); + val currentDate = DateTime.now(DateTimeZone.UTC).withTimeAtStartOfDay().plus(offset); + val startDate = currentDate.minusDays(count * 30).dayOfMonth().withMinimumValue().toString("yyyy-MM-dd'T'HH:mm:ssZZ"); + val endDate = currentDate.dayOfMonth().withMinimumValue().toString("yyyy-MM-dd'T'HH:mm:ssZZ"); startDate + "/" + endDate } def getWeekRange(count: Int): String = { - val currentDate = DateTime.now(DateTimeZone.UTC); - val startDate = currentDate.minusDays(count * 7).dayOfWeek().withMinimumValue().toString("yyyy-MM-dd") - val endDate = currentDate.dayOfWeek().withMinimumValue().toString("yyyy-MM-dd"); + val currentDate = DateTime.now(DateTimeZone.UTC).withTimeAtStartOfDay().plus(offset); + val startDate = currentDate.minusDays(count * 7).dayOfWeek().withMinimumValue().toString("yyyy-MM-dd'T'HH:mm:ssZZ") + val endDate = currentDate.dayOfWeek().withMinimumValue().toString("yyyy-MM-dd'T'HH:mm:ssZZ"); startDate + "/" + endDate } def getGranularity(value: String): Granularity = { - GranularityType.decode(value).right.getOrElse(GranularityType.All) + value.toLowerCase match { + case "latest_index" => + GranularityType.decode("all").right.getOrElse(GranularityType.All) + case _ => + GranularityType.decode(value).right.getOrElse(GranularityType.All) + } } def getMetricEvent(params: Map[String, AnyRef], producerId: String, producerPid: String): V3DerivedEvent = { @@ -777,9 +702,46 @@ object CommonUtil { } def getPostgresConnectionProps(): Properties = { + val connProperties = new Properties() val user = AppConf.getConfig("postgres.user") val pass = AppConf.getConfig("postgres.pass") + connProperties.setProperty("driver", "org.postgresql.Driver") + connProperties.setProperty("user", user) + connProperties.setProperty("password", pass) + connProperties + } + + def getS3File(bucket: String, file: String): String = { + "s3n://" + bucket + "/" + file; + } + + def getS3FileWithoutPrefix(bucket: String, file: String): String = { + bucket + "/" + file; + } + + def getAzureFile(bucket: String, file: String, storageKey: String = "azure_storage_key"): String = { + "wasb://" + bucket + "@" + AppConf.getConfig(storageKey) + ".blob.core.windows.net/" + file; + } + + def getAzureFileWithoutPrefix(bucket: String, file: String, storageKey: String = "azure_storage_key"): String = { + bucket + "@" + AppConf.getConfig(storageKey) + ".blob.core.windows.net/" + file; + } + + def setStorageConf(store: String, accountKey: Option[String], accountSecret: Option[String])(implicit sc: SparkContext): Configuration = { + store.toLowerCase() match { + case "s3" => + sc.hadoopConfiguration.set("fs.s3n.awsAccessKeyId", AppConf.getConfig(accountKey.getOrElse("aws_storage_key"))); + sc.hadoopConfiguration.set("fs.s3n.awsSecretAccessKey", AppConf.getConfig(accountSecret.getOrElse("aws_storage_secret"))); + case "azure" => + sc.hadoopConfiguration.set("fs.azure", "org.apache.hadoop.fs.azure.NativeAzureFileSystem") + sc.hadoopConfiguration.set("fs.azure.account.key." + AppConf.getConfig(accountKey.getOrElse("azure_storage_key")) + ".blob.core.windows.net", AppConf.getConfig(accountSecret.getOrElse("azure_storage_secret"))) + case _ => + // Do nothing + } + sc.hadoopConfiguration + } + def getPostgresConnectionUserProps(user:String,pass: String): Properties = { val connProperties = new Properties() connProperties.setProperty("driver", "org.postgresql.Driver") connProperties.setProperty("user", user) diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/DatasetUtil.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/DatasetUtil.scala new file mode 100644 index 00000000..4006f498 --- /dev/null +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/DatasetUtil.scala @@ -0,0 +1,86 @@ +package org.ekstep.analytics.framework.util + +import java.nio.file.Paths + +import org.apache.spark.sql.{Dataset, Row} +import org.apache.spark.sql.functions.col +import org.ekstep.analytics.framework.StorageConfig + +class DatasetExt(df: Dataset[Row]) { + + private val fileUtil = new HadoopFileUtil(); + + private def getTempDir(filePrefix: String, reportId: String): String = { + Paths.get(filePrefix, reportId, "/_tmp/").toString() + } + + private def getFinalDir(filePrefix: String, reportId: String): String = { + Paths.get(filePrefix, reportId).toString(); + } + + private def filePaths(dims: Seq[String], row: Row, format: String, tempDir: String, finalDir: String): (String, String) = { + + val dimPaths = for(dim <- dims) yield { + dim + "=" + row.get(row.fieldIndex(dim)) + } + + val paths = for(dim <- dims) yield { + row.get(row.fieldIndex(dim)) + } + + (Paths.get(tempDir, dimPaths.mkString("/")).toString(), Paths.get(finalDir, paths.mkString("/")) + "." + format) + } + + def saveToBlobStore(storageConfig: StorageConfig, format: String, reportId: String, options: Option[Map[String, String]], partitioningColumns: Option[Seq[String]]): List[String] = { + + val conf = df.sparkSession.sparkContext.hadoopConfiguration; + + val file = storageConfig.store.toLowerCase() match { + case "s3" => + CommonUtil.getS3FileWithoutPrefix(storageConfig.container, storageConfig.fileName); + case "azure" => + CommonUtil.getAzureFileWithoutPrefix(storageConfig.container, storageConfig.fileName, storageConfig.accountKey.getOrElse("azure_storage_key")) + case _ => + storageConfig.fileName + } + + val filePrefix = storageConfig.store.toLowerCase() match { + case "s3" => + "s3n://" + case "azure" => + "wasb://" + case _ => + "" + } + + val tempDir = getTempDir(file, reportId); + val finalDir = getFinalDir(file, reportId); + + val dims = partitioningColumns.getOrElse(Seq()); + + fileUtil.delete(conf, filePrefix + tempDir) + val opts = options.getOrElse(Map()); + val files = if(dims.nonEmpty) { + val map = df.select(dims.map(f => col(f)):_*).distinct().collect().map(f => filePaths(dims, f, format, tempDir, finalDir)).toMap + df.repartition(1).write.format(format).options(opts).partitionBy(dims: _*).save(filePrefix + tempDir); + map.foreach(f => { + fileUtil.delete(conf, filePrefix + f._2) + fileUtil.copyMerge(filePrefix + f._1, filePrefix + f._2, conf, true); + }) + map.map(f => filePrefix + f._2).toList + } else { + df.repartition(1).write.format(format).options(opts).save(filePrefix + tempDir); + fileUtil.delete(conf, filePrefix + finalDir + "." + format) + fileUtil.copyMerge(filePrefix + tempDir, filePrefix + finalDir + "." + format, conf, true); + List(filePrefix + finalDir + "." + format) + } + fileUtil.delete(conf, filePrefix + tempDir) + files + } + +} + +object DatasetUtil { + implicit def extensions(df: Dataset[Row]) = new DatasetExt(df); + +} \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/HadoopFileUtil.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/HadoopFileUtil.scala new file mode 100644 index 00000000..29c64f54 --- /dev/null +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/HadoopFileUtil.scala @@ -0,0 +1,69 @@ +package org.ekstep.analytics.framework.util + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, FileUtil, Path} +import org.apache.hadoop.io.IOUtils + +import scala.util.Try + +class HadoopFileUtil { + + /** + * Delete a single file. + */ + def delete(file: String, conf: Configuration) : Boolean = { + + val path = new Path(file); + val fileSystem = path.getFileSystem(conf); + fileSystem.delete(path, true); + } + + def copy(srcFile: String, destFile: String, conf: Configuration) : String = { + + val src = new Path(srcFile); + val fileSystem = src.getFileSystem(conf); + fileSystem.copyToLocalFile(false, src, new Path(destFile)) + destFile + } + + /** + * Delete multiple files. Different file sources (aws, azure etc) can be passed here + */ + def delete(conf: Configuration, files: String*) : Seq[Boolean] = { + + for(file <- files) yield { + val path = new Path(file); + path.getFileSystem(conf).delete(path, true); + } + + } + + /** + * Merge a hadoop source folder/file into another file + */ + def copyMerge(srcPath: String, destPath: String, conf: Configuration, deleteSrc: Boolean) { + + val srcFilePath = new Path(srcPath); + val destFilePath = new Path(destPath); + copyMerge(srcFilePath.getFileSystem(conf), srcFilePath, destFilePath.getFileSystem(conf), destFilePath, deleteSrc, conf) + } + + def copyMerge(srcFS: FileSystem, srcDir: Path, dstFS: FileSystem, dstFile: Path, + deleteSource: Boolean, conf: Configuration): Boolean = { + + if (srcFS.exists(srcDir) && srcFS.getFileStatus(srcDir).isDirectory) { + val outputFile = dstFS.create(dstFile) + Try { + srcFS.listStatus(srcDir).sortBy(_.getPath.getName) + .collect { + case status if status.isFile() => + val inputFile = srcFS.open(status.getPath()) + Try(IOUtils.copyBytes(inputFile, outputFile, conf, false)) + inputFile.close() + } + } + outputFile.close() + if (deleteSource) srcFS.delete(srcDir, true) else true + } else false + } +} \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JSONUtils.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JSONUtils.scala index b3fe63b7..cd46cab5 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JSONUtils.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JSONUtils.scala @@ -30,16 +30,10 @@ object JSONUtils { @throws(classOf[Exception]) def deserialize[T: Manifest](value: String): T = mapper.readValue(value, typeReference[T]); - @throws(classOf[Exception]) - def unescapeJSON(string: String): String = { - StringEscapeUtils.unescapeJava(string) - } - private[this] def typeReference[T: Manifest] = new TypeReference[T] { override def getType = typeFromManifest(manifest[T]) } - private[this] def typeFromManifest(m: Manifest[_]): Type = { if (m.typeArguments.isEmpty) { m.runtimeClass } // $COVERAGE-OFF$Disabling scoverage as this code is impossible to test diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JobLogger.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JobLogger.scala index 2c3a2459..02fac8ce 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JobLogger.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/JobLogger.scala @@ -13,10 +13,13 @@ import org.apache.logging.log4j.core.layout.PatternLayout import java.nio.charset.Charset import org.apache.logging.log4j.core.config.AppenderRef +import org.ekstep.analytics.framework.dispatcher.KafkaDispatcher import org.joda.time.DateTime object JobLogger { + implicit val fc = new FrameworkContext(); + def init(jobName: String) = { System.setProperty("logFilename", jobName.toLowerCase()); val ctx = LogManager.getContext(false).asInstanceOf[LoggerContext]; @@ -29,31 +32,35 @@ object JobLogger { } private def info(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) { - logger(name).info(JSONUtils.serialize(getV3JobEvent("JOB_LOG", "INFO", msg, data, None, pdata_id, pdata_pid))); + val event = JSONUtils.serialize(getV3JobEvent("JOB_LOG", "INFO", msg, data, None, pdata_id, pdata_pid)) + logEvent(event, name, INFO) } private def debug(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) { - logger(name).debug(JSONUtils.serialize(getV3JobEvent("JOB_LOG", "DEBUG", msg, data, None, pdata_id, pdata_pid))) + val event = JSONUtils.serialize(getV3JobEvent("JOB_LOG", "DEBUG", msg, data, None, pdata_id, pdata_pid)) + logger(name).debug(event); } private def error(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) { - logger(name).error(JSONUtils.serialize(getV3JobEvent("JOB_LOG", "ERROR", msg, data, None, pdata_id, pdata_pid))); + val event = JSONUtils.serialize(getV3JobEvent("JOB_LOG", "ERROR", msg, data, None, pdata_id, pdata_pid)) + logEvent(event, name, ERROR) } private def warn(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) { - logger(name).debug(JSONUtils.serialize(getV3JobEvent("JOB_LOG", "WARN", msg, data, None, pdata_id, pdata_pid))) + val event = JSONUtils.serialize(getV3JobEvent("JOB_LOG", "WARN", msg, data, None, pdata_id, pdata_pid)) + logger(name).debug(event); } def start(msg: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) = { val event = JSONUtils.serialize(getV3JobEvent("JOB_START", "INFO", msg, data, None, pdata_id, pdata_pid)); EventBusUtil.dipatchEvent(event); - logger(name).info(event); + logEvent(event, name, INFO) } def end(msg: String, status: String, data: Option[AnyRef] = None, name: String = "org.ekstep.analytics", pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String) = { val event = JSONUtils.serialize(getV3JobEvent("JOB_END", "INFO", msg, data, Option(status), pdata_id, pdata_pid)); EventBusUtil.dipatchEvent(event); - logger(name).info(event); + logEvent(event, name, INFO) } def log(msg: String, data: Option[AnyRef] = None, logLevel: Level = DEBUG, name: String = "org.ekstep.analytics")(implicit className: String) = { @@ -69,6 +76,26 @@ object JobLogger { } } + def logEvent(event: String, name: String = "org.ekstep.analytics", logLevel: Level = DEBUG) = { + if (StringUtils.equalsIgnoreCase(AppConf.getConfig("log.appender.kafka.enable"), "true")) { + val brokerList = AppConf.getConfig("log.appender.kafka.broker_host") + val topic = AppConf.getConfig("log.appender.kafka.topic") + KafkaDispatcher.dispatch(Array(event), Map("brokerList" -> brokerList, "topic" -> topic)) + } + else { + logLevel match { + case INFO => + logger(name).info(event); + case DEBUG => + logger(name).debug(event); + case WARN => + logger(name).debug(event); + case ERROR => + logger(name).error(event); + } + } + } + private def getV3JobEvent(eid: String, level: String, msg: String, data: Option[AnyRef], status: Option[String] = None, pdata_id: String = "AnalyticsDataPipeline", pdata_pid: String = JobContext.jobName)(implicit className: String): V3DerivedEvent = { val measures = Map( "class" -> className, diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/RestUtil.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/RestUtil.scala index 7ff21e89..8f294f15 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/RestUtil.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/RestUtil.scala @@ -8,7 +8,7 @@ import org.ekstep.analytics.framework.Level._ import scala.io.Source trait HTTPClient { - def get[T](apiURL: String)(implicit mf: Manifest[T]): T + def get[T](apiURL: String, requestHeaders: Option[Map[String, String]] = None)(implicit mf: Manifest[T]): T def post[T](apiURL: String, body: String, requestHeaders: Option[Map[String, String]] = None)(implicit mf: Manifest[T]): T def patch[T](apiURL: String, body: String, headers: Option[Map[String,String]] = None)(implicit mf: Manifest[T]): T def put[T](apiURL:String, body:String,headers:Option[Map[String,String]] = None)(implicit mf:Manifest[T]):T @@ -41,9 +41,12 @@ object RestUtil extends HTTPClient{ } } - def get[T](apiURL: String)(implicit mf: Manifest[T]) = { + def get[T](apiURL: String, headers: Option[Map[String,String]] = None)(implicit mf: Manifest[T]) = { val request = new HttpGet(apiURL); request.addHeader("user-id", "analytics"); + headers.getOrElse(Map()).foreach { header => + request.addHeader(header._1, header._2) + } try { _call(request.asInstanceOf[HttpRequestBase]); } catch { diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/ResultAccumulator.scala b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/ResultAccumulator.scala new file mode 100644 index 00000000..6746da4e --- /dev/null +++ b/analytics-core/src/main/scala/org/ekstep/analytics/framework/util/ResultAccumulator.scala @@ -0,0 +1,56 @@ +package org.ekstep.analytics.framework.util + +import akka.stream.stage.{GraphStage, GraphStageLogic, InHandler, OutHandler} +import akka.stream.{Attributes, FlowShape, Inlet, Outlet} +import org.ekstep.analytics.framework.conf.AppConf + +import scala.collection.immutable + +final class ResultAccumulator[E] extends GraphStage[FlowShape[E, immutable.Seq[E]]] { + + val in = Inlet[E]("ResultAccumulator.in") + val out = Outlet[immutable.Seq[E]]("ResultAccumulator.out") + + override def shape = FlowShape.of(in, out) + + override def createLogic(attributes: Attributes) = new GraphStageLogic(shape) { + + private var counter: Int = 0 + private val buffer = Vector.newBuilder[E] + + setHandlers(in, out, new InHandler with OutHandler { + + override def onPush(): Unit = { + val nextElement = grab(in) + counter += 1 + + if (counter < AppConf.getConfig("druid.query.batch.buffer").toLong) { + buffer += nextElement + pull(in) + } else { + val result = buffer.result().toList + buffer.clear() + buffer += nextElement + counter = 0 + push(out, result) + } + } + + override def onPull(): Unit = { + pull(in) + } + + override def onUpstreamFinish(): Unit = { + val result = buffer.result().toList + if (result.nonEmpty) { + emit(out, result) + } + completeStage() + } + }) + + override def postStop(): Unit = { + buffer.clear() + } + } +} \ No newline at end of file diff --git a/analytics-core/src/main/scala/org/ekstep/analytics/streaming/KafkaEventProducer.scala b/analytics-core/src/main/scala/org/ekstep/analytics/streaming/KafkaEventProducer.scala index a9b4d164..a67c0baa 100644 --- a/analytics-core/src/main/scala/org/ekstep/analytics/streaming/KafkaEventProducer.scala +++ b/analytics-core/src/main/scala/org/ekstep/analytics/streaming/KafkaEventProducer.scala @@ -24,14 +24,17 @@ object KafkaEventProducer { implicit val className: String = "KafkaEventProducer"; - def init(brokerList: String): KafkaProducer[String, String] = { + def init(brokerList: String, batchSize: Integer, lingerMs: Integer): KafkaProducer[String, String] = { // Zookeeper connection properties val props = new HashMap[String, Object]() - props.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, 3000L.asInstanceOf[Long]); + props.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize); + props.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 60000.asInstanceOf[Integer]); props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList); props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") + props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy") + props.put(ProducerConfig.LINGER_MS_CONFIG, lingerMs) new KafkaProducer[String, String](props); } @@ -40,15 +43,15 @@ object KafkaEventProducer { producer.close(); } - def sendEvent(event: AnyRef, topic: String, brokerList: String) = { - val producer = init(brokerList); + def sendEvent(event: AnyRef, topic: String, brokerList: String, batchSize: Integer, lingerMs: Integer) = { + val producer = init(brokerList, batchSize, lingerMs); val message = new ProducerRecord[String, String](topic, null, JSONUtils.serialize(event)); producer.send(message); close(producer); } - def sendEvents(events: Buffer[AnyRef], topic: String, brokerList: String) = { - val producer = init(brokerList); + def sendEvents(events: Buffer[AnyRef], topic: String, brokerList: String, batchSize: Integer, lingerMs: Integer) = { + val producer = init(brokerList, batchSize, lingerMs); events.foreach { event => { val message = new ProducerRecord[String, String](topic, null, JSONUtils.serialize(event)); @@ -59,8 +62,8 @@ object KafkaEventProducer { } @throws(classOf[DispatcherException]) - def sendEvents(events: Array[String], topic: String, brokerList: String) = { - val producer = init(brokerList); + def sendEvents(events: Array[String], topic: String, brokerList: String, batchSize: Integer, lingerMs: Integer) = { + val producer = init(brokerList, batchSize, lingerMs); events.foreach { event => { val message = new ProducerRecord[String, String](topic, event); @@ -70,8 +73,8 @@ object KafkaEventProducer { close(producer); } - def publishEvents(events: Buffer[String], topic: String, brokerList: String) = { - val producer = init(brokerList); + def publishEvents(events: Buffer[String], topic: String, brokerList: String, batchSize: Integer, lingerMs: Integer) = { + val producer = init(brokerList, batchSize, lingerMs); events.foreach { event => { val message = new ProducerRecord[String, String](topic, null, event); diff --git a/analytics-core/src/test/resources/application.conf b/analytics-core/src/test/resources/application.conf index c97bf37c..b45684b3 100644 --- a/analytics-core/src/test/resources/application.conf +++ b/analytics-core/src/test/resources/application.conf @@ -26,13 +26,34 @@ druid = { datasource = "summary-events" response-parsing-timeout = 300000 } -druid.query.wait.time.mins=1 +druid.rollup.host="localhost" +druid.rollup.port=8082 +druid.query.wait.time.mins=5 druid.report.upload.wait.time.mins=1 - +druid.scan.batch.size=100 +druid.scan.batch.bytes=2000000 +druid.query.batch.buffer=10 spark.memory_fraction=0.3 spark.storage_fraction=0.5 spark.driver_memory=1g +druid.latestindex.query="select segment.start, segment.end from druid_segments segment where datasource = 'content-model-snapshot' and used='t' order by start" //postgres configuration postgres.user="postgres" postgres.pass="postgres" + +postgres.druid.db="postgres" +postgres.druid.url="jdbc:postgresql://localhost:65124/" +postgres.druid.user="postgres" +postgres.druid.pass="postgres" + +azure_storage_key = azure-test-key +azure_storage_secret = azure-test-secret + +aws_storage_key = aws-test-key +aws_storage_secret = aws-test-secret + +# Joblog Kafka appender config for cluster execution +log.appender.kafka.enable="false" +log.appender.kafka.broker_host="localhost:9092" +log.appender.kafka.topic="telemetry.log" \ No newline at end of file diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFetcher.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFetcher.scala index 86f12b42..ecd0a14a 100644 --- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFetcher.scala +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFetcher.scala @@ -5,6 +5,8 @@ import org.ekstep.analytics.framework.util.JSONUtils import org.scalamock.scalatest.MockFactory import org.scalatest.Matchers import org.sunbird.cloud.storage.BaseStorageService +import org.ekstep.analytics.framework.fetcher.S3DataFetcher +import org.ekstep.analytics.framework.fetcher.AzureDataFetcher /** * @author Santhosh @@ -26,6 +28,7 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory { it should "fetch the events from local file" in { implicit val fc = new FrameworkContext(); + fc.inputEventsCount = sc.longAccumulator("Count"); val search = Fetcher("local", None, Option(Array( Query(None, None, None, None, None, None, None, None, None, Option("src/test/resources/sample_telemetry.log")) ))); @@ -43,12 +46,19 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory { ))); val rdd1 = DataFetcher.fetchBatchData[TestDataFetcher](search1); rdd1.count should be (0) + + val search2 = Fetcher("local", None, Option(Array( + Query(None, None, None, None, None, None, None, None, None, None) + ))); + val rdd2 = DataFetcher.fetchBatchData[TestDataFetcher](search2); + rdd1.count should be (0) } it should "fetch no file from S3 and return an empty RDD" in { implicit val mockFc = mock[FrameworkContext]; val mockStorageService = mock[BaseStorageService] + mockFc.inputEventsCount = sc.longAccumulator("Count"); (mockFc.getStorageService(_:String):BaseStorageService).expects("aws").returns(mockStorageService); (mockStorageService.searchObjects _).expects("dev-data-store", "abc/", Option("2012-01-01"), Option("2012-02-01"), None, "yyyy-MM-dd").returns(null); (mockStorageService.getPaths _).expects("dev-data-store", null).returns(List("src/test/resources/sample_telemetry_2.log")) @@ -78,8 +88,9 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory { it should "fetch the batch events from azure" in { implicit val mockFc = mock[FrameworkContext]; + mockFc.inputEventsCount = sc.longAccumulator("Count"); val mockStorageService = mock[BaseStorageService] - (mockFc.getStorageService(_:String):BaseStorageService).expects("azure").returns(mockStorageService); + (mockFc.getStorageService(_:String, _:String, _:String):BaseStorageService).expects("azure", "azure_storage_key", "azure_storage_secret").returns(mockStorageService); (mockStorageService.searchObjects _).expects("dev-data-store", "raw/", Option("2017-08-31"), Option("2017-08-31"), None, "yyyy-MM-dd").returns(null); (mockStorageService.getPaths _).expects("dev-data-store", null).returns(List("src/test/resources/sample_telemetry_2.log")) val queries = Option(Array( @@ -92,7 +103,7 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory { it should "invoke the druid data fetcher" in { implicit val fc = new FrameworkContext(); - val unknownQuery = DruidQueryModel("scan", "telemetry-events", "LastWeek", Option("day"), None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END")))))) + val unknownQuery = DruidQueryModel("time", "telemetry-events", "LastWeek", Option("day"), None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END")))))) the[DataFetcherException] thrownBy { DataFetcher.fetchBatchData[TimeSeriesData](Fetcher("druid", None, None, Option(unknownQuery))); } should have message "Unknown druid query type found" @@ -104,4 +115,51 @@ class TestDataFetcher extends SparkSpec with Matchers with MockFactory { val rdd = DataFetcher.fetchBatchData[Event](Fetcher("none", None, None)); rdd.isEmpty() should be (true) } + + it should "cover the missing branches in S3DataFetcher, AzureDataFetcher and DruidDataFetcher" in { + implicit val fc = new FrameworkContext(); + var query = JSONUtils.deserialize[Query]("""{"bucket":"test-container","prefix":"test/","folder":"true","endDate":"2020-01-10"}""") + S3DataFetcher.getObjectKeys(Array(query)).head should be ("s3n://test-container/test/2020-01-10") + AzureDataFetcher.getObjectKeys(Array(query)).head should be ("wasb://test-container@azure-test-key.blob.core.windows.net/test/2020-01-10") + + query = JSONUtils.deserialize[Query]("""{"bucket":"test-container","prefix":"test/","folder":"true","endDate":"2020-01-10","excludePrefix":"test"}""") + S3DataFetcher.getObjectKeys(Array(query)).size should be (0) + AzureDataFetcher.getObjectKeys(Array(query)).size should be (0) + + } + + + it should "check for getFilteredKeys from azure via partitions" in { + + // with single partition + val query1 = Query(Option("dev-data-store"), Option("raw/"), Option("2020-06-10"), Option("2020-06-11"), None, None, None, None, None, None, None, None, None, None, Option(List(0))) + val keys1 = DataFetcher.getFilteredKeys(query1, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-1-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), Option(List(0))) + keys1.length should be (2) + keys1.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-0-1591845501666.json.gz") + + // with mutilple partition + val query2 = Query(Option("dev-data-store"), Option("raw/"), Option("2020-06-11"), Option("2020-06-11"), None, None, None, None, None, None, None, None, None, None, Option(List(0,1))) + val keys2 = DataFetcher.getFilteredKeys(query2, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), Option(List(0,1))) + keys2.length should be (2) + keys2.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz") + + // without partition + val query3 = Query(Option("dev-data-store"), Option("raw/"), Option("2020-06-11"), Option("2020-06-11"), None, None, None, None, None, None, None, None, None, None, None) + val keys3 = DataFetcher.getFilteredKeys(query3, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), None) + keys3.length should be (2) + keys3.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz") + + // without only end date + val query4 = Query(Option("dev-data-store"), Option("raw/"), None, Option("2020-06-11"), None, None, None, None, None, None, None, None, None, None, Option(List(0,1))) + val keys4 = DataFetcher.getFilteredKeys(query4, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), Option(List(0,1))) + keys4.length should be (2) + keys4.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz") + + // without only end date and delta + val query5 = Query(Option("dev-data-store"), Option("raw/"), None, Option("2020-06-11"), Option(1), None, None, None, None, None, None, None, None, None, Option(List(0))) + val keys5 = DataFetcher.getFilteredKeys(query5, Array("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-1-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-0-1591845501666.json.gz", "https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-11-1-1591845501666.json.gz"), Option(List(0))) + keys5.length should be (2) + keys5.head should be ("https://sunbirddevprivate.blob.core.windows.net/dev-data-store/raw/2020-06-10-0-1591845501666.json.gz") + } + } \ No newline at end of file diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFilter.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFilter.scala index 7e4347f8..d6524acc 100644 --- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFilter.scala +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestDataFilter.scala @@ -36,32 +36,14 @@ class TestDataFilter extends SparkSpec { val filteredEvents = DataFilter.filterAndSort[Event](events, filters, None); filteredEvents.count() should be (20); filteredEvents.first().eid should be("GE_GENIE_START") + + DataFilter.filter[Event, String](events, "GE_GENIE_START", (event: Event, id: String) => { + id.equals(event.eid) + }).count() should be (20); + + DataFilter.filter[Event, String](events, "GE_GENIE_START", null).count() should be (7437); } - it should "filter the events where game id equals org.ekstep.aser" in { - val filters = Option(Array[Filter]( - Filter("gameId", "EQ", Option("org.ekstep.aser")) - )); - val filteredEvents = DataFilter.filterAndSort(events, filters, None); - filteredEvents.count() should be (6276); - filteredEvents.first().gdata.id should be("genie.android") - } - - it should "filter the events where game id not equals org.ekstep.aser" in { - val filters = Option(Array[Filter]( - Filter("gameId", "NE", Option("org.ekstep.aser")) - )); - val filteredEvents = DataFilter.filterAndSort(events, filters, None); - filteredEvents.count() should be (1161); - } - - it should "filter the events by game version" in { - val filters = Option(Array[Filter]( - Filter("gameVersion", "EQ", Option("3.0.26")) - )); - val filteredEvents = DataFilter.filterAndSort(events, filters, None); - filteredEvents.count() should be (1413); - } it should "filter by custom key using bean property matching " in { val filters = Option(Array[Filter]( @@ -138,7 +120,7 @@ class TestDataFilter extends SparkSpec { it should "filter by two criteria" in { val filters = Option(Array[Filter]( Filter("eventId", "IN", Option(List("OE_ASSESS", "OE_LEVEL_SET"))), - Filter("gameId", "EQ", Option("org.ekstep.aser")) + Filter("gdata.id", "EQ", Option("org.ekstep.aser")) )); val filteredEvents = DataFilter.filterAndSort(events, filters, None); filteredEvents.count() should be (1872); @@ -247,14 +229,6 @@ class TestDataFilter extends SparkSpec { result1(0).id should be ("Two"); } - it should "filter by genie tag" in { - val filteredEvents = DataFilter.filter(events, Filter("genieTag", "IN", Option(List("e4d7a0063b665b7a718e8f7e4014e59e28642f8c")))); - filteredEvents.count() should be (3); - - val filteredEvents2 = DataFilter.filter(events, Filter("genieTag", "IN", Option(List("e4d7a0063b665b7a718e8f7e4014e59e28642f9c")))); - filteredEvents2.count() should be (2); - } - it should "filter events using range" in { val date = CommonUtil.dateFormat.parseDateTime("2015-09-23"); @@ -282,7 +256,7 @@ class TestDataFilter extends SparkSpec { val date = new DateTime() val filters: Array[Filter] = Array( Filter("eventts", "RANGE", Option(Map("start" -> 0L, "end" -> date.getMillis))), - Filter("genieTag", "IN", Option(""))) + Filter("tags", "IN", Option(""))) DataFilter.matches(inputEvent.first(), filters) should be(false) DataFilter.matches(inputEvent.first(), Filter("eventts", "RANGE", Option(Map("start" -> 0L, "end" -> date.getMillis)))) should be(true) DataFilter.matches(inputEvent.first(), Array[Filter]()) should be(true) @@ -304,6 +278,27 @@ class TestDataFilter extends SparkSpec { ); val filteredEvents = DataFilter.filter(rddData, filters); filteredEvents.count() should be (0); + + DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("intCol", "LT", Option(3.asInstanceOf[AnyRef]))) should be (false) + DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("longCol", "LT", Option(9L.asInstanceOf[AnyRef]))) should be (false) + DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 3.0, sdf.parse("2019-11-11")), Filter("doubleCol", "LT", Option(2.0.asInstanceOf[AnyRef]))) should be (false) + DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("dateCol", "LT", Option("2019-11-10".asInstanceOf[AnyRef]))) should be (false) + DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("dateCol", "RANGE", Option(Map("start" -> "2019-11-10", "end" -> "2019-11-14")))) should be (true) + DataFilter.matches[TestLessThan](TestLessThan("0", 4, 10L, 1.0, sdf.parse("2019-11-11")), Filter("dateCol", "RANGE", Option(Map("start" -> "2019-11-07", "end" -> "2019-11-09")))) should be (false) + } + + it should "cover all uncovered branches" in { + DataFilter.matches[MeasuredEvent](MeasuredEvent(null, 0l, 123l, null, null, null, null, None, None, null, null, null), Filter("eventts", "RANGE", Option(Map("start" -> 0L, "end" -> 124l)))) should be (true) + + case class Event1(val eid: String, val ts: String, val ets: Long, val `@timestamp`: String) + DataFilter.matches[Event1](Event1(null, "", 123l, "2016-01-02T00:59:22.924Z"), Filter("eventts", "EQ", Option(1451696362924l.asInstanceOf[AnyRef]))) should be (true) + + @scala.beans.BeanInfo + case class Event2(eid: String, tags: List[String]) + DataFilter.matches[Event2](Event2("Test", List("tag1", "tag2", "tag3")), Filter("tags", "IN", Option(List("tag2")))) should be (true) + DataFilter.matches[Event2](Event2("Test", List("tag1", "tag2", "tag3")), Filter("tags", "NIN", Option(List("tag2")))) should be (false) + DataFilter.matches[Event2](Event2("Test", List("tag1", "tag2", "tag3")), Filter("eid", "NE", Option("Test"))) should be (false) + DataFilter.matches[Event2](Event2("Test", List("tag1", "tag2", "tag3")), Filter("eid", "NE", None)) should be (true) } } \ No newline at end of file diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestFrameworkContext.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestFrameworkContext.scala new file mode 100644 index 00000000..b13ddf96 --- /dev/null +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestFrameworkContext.scala @@ -0,0 +1,53 @@ +package org.ekstep.analytics.framework + +import java.text.SimpleDateFormat + +import org.scalatest._ +import org.apache.spark.rdd.RDD +import org.ekstep.analytics.framework.util.CommonUtil +import org.apache.spark.SparkContext +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods +import com.fasterxml.jackson.core.JsonParseException +import org.ekstep.analytics.framework.exception.DataFilterException +import org.apache.spark.SparkException +import org.ekstep.analytics.framework.util.JSONUtils + +import scala.collection.mutable.Buffer +import java.util.Date + +import org.joda.time.DateTime + + +/** + * @author Santhosh + */ +class TestFrameworkContext extends BaseSpec with BeforeAndAfterAll { + + "FrameworkContext" should "test all methods" in { + + val fc = new FrameworkContext(); + + noException should be thrownBy { + fc.shutdownStorageService(); + } + + fc.initialize(Option(Array(("azure", "local", "local")))); + fc.getStorageService("azure", "local", "local") should not be (null) + + fc.storageContainers.clear(); + fc.getStorageService("azure") should not be (null) + + fc.setDruidClient(null, null); + noException should be thrownBy { + fc.shutdownDruidClient(); + } + + fc.getDruidClient() should not be (null); + fc.setDruidClient(fc.getDruidClient(), fc.getDruidRollUpClient()) + + fc.getAkkaHttpUtil() should not be (null) + fc.closeContext(); + } + +} \ No newline at end of file diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestOutputDispatcher.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestOutputDispatcher.scala index 67de3b27..2536671e 100644 --- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestOutputDispatcher.scala +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/TestOutputDispatcher.scala @@ -9,6 +9,9 @@ import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException import org.scalamock.scalatest.MockFactory import org.scalatest.Matchers import org.sunbird.cloud.storage.BaseStorageService +import org.ekstep.analytics.framework.dispatcher.S3Dispatcher +import org.apache.hadoop.fs.azure.AzureException +import org.ekstep.analytics.framework.dispatcher.ConsoleDispatcher /** * @author Santhosh @@ -30,38 +33,6 @@ class TestOutputDispatcher extends SparkSpec("src/test/resources/sample_telemetr OutputDispatcher.dispatch(Dispatcher("console", Map()), sc.parallelize(events.take(1))); } - val eventsInArray = events.map { x => JSONUtils.serialize(x) }.collect - noException should be thrownBy { - OutputDispatcher.dispatch(Dispatcher("console", Map()), eventsInArray); - } - } - - it should "dispatch output to s3" in { - - implicit val mockFc = mock[FrameworkContext]; - val mockStorageService = mock[BaseStorageService] - (mockFc.getStorageService(_: String): BaseStorageService).expects("aws").returns(mockStorageService).anyNumberOfTimes(); - (mockStorageService.upload _).expects("dev-data-store", *, *, Option(false), None, None, None).returns(null).anyNumberOfTimes(); - (mockStorageService.closeContext _).expects().returns().anyNumberOfTimes() - val output1 = Dispatcher("s3file", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log1.json", "filePath" -> "src/test/resources/sample_telemetry.log", "zip" -> true.asInstanceOf[AnyRef])); - val output2 = Dispatcher("s3file", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log1.json", "filePath" -> "src/test/resources/sample_telemetry.log.gz")); - val output3 = Dispatcher("s3file", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log2.json")); - noException should be thrownBy { - OutputDispatcher.dispatch(output1, events); - OutputDispatcher.dispatch(output2, events); - OutputDispatcher.dispatch(output3, events); - } - - val output4 = Dispatcher("s3", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log1.json", "filePath" -> "src/test/resources/sample_telemetry.log", "zip" -> true.asInstanceOf[AnyRef])); - val output5 = Dispatcher("s3", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log1.json", "filePath" -> "src/test/resources/sample_telemetry.log.gz")); - val output6 = Dispatcher("s3", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> "output/test-log2.json")); - val eventRDDString = events.map(f => JSONUtils.serialize(f)).collect(); - //noException should be thrownBy { - OutputDispatcher.dispatch(output4, eventRDDString); - OutputDispatcher.dispatch(output5, eventRDDString); - OutputDispatcher.dispatch(output6, eventRDDString); - //} - } it should "throw dispatcher exceptions" in { @@ -103,20 +74,32 @@ class TestOutputDispatcher extends SparkSpec("src/test/resources/sample_telemetr // Invoke S3 dispatcher without required fields ('bucket','key') a[DispatcherException] should be thrownBy { - OutputDispatcher.dispatch(Dispatcher("s3", Map[String, AnyRef]("zip" -> true.asInstanceOf[AnyRef])), events); - OutputDispatcher.dispatch(Dispatcher("s3", Map[String, AnyRef]("bucket" -> Option("test"))), events); - OutputDispatcher.dispatch(Dispatcher("s3File", Map[String, AnyRef]("zip" -> true.asInstanceOf[AnyRef])), events); - OutputDispatcher.dispatch(Dispatcher("s3File", Map[String, AnyRef]("bucket" -> Option("test"))), events); + OutputDispatcher.dispatch(Dispatcher("s3", Map[String, AnyRef]("key" -> "testKey")), events); } - - // Invoke dispatch with null dispatcher + a[DispatcherException] should be thrownBy { - OutputDispatcher.dispatch(null.asInstanceOf[Dispatcher], events); + OutputDispatcher.dispatch(Dispatcher("s3", Map[String, AnyRef]("bucket" -> "testBucket")), events); + } + + a[DispatcherException] should be thrownBy { + OutputDispatcher.dispatch(StorageConfig("s3", null, null), events); + } + + a[DispatcherException] should be thrownBy { + OutputDispatcher.dispatch(StorageConfig("file", "test", null), events); + } + + a[DispatcherException] should be thrownBy { + OutputDispatcher.dispatch(null.asInstanceOf[StorageConfig], events); + } + + a[DispatcherException] should be thrownBy { + ConsoleDispatcher.dispatch(events.map(f => JSONUtils.serialize(f)), StorageConfig("file", "test", null)); } - val eventsInArray = events.map { x => JSONUtils.serialize(x) }.collect + // Invoke dispatch with null dispatcher a[DispatcherException] should be thrownBy { - OutputDispatcher.dispatch(null.asInstanceOf[Dispatcher], eventsInArray); + OutputDispatcher.dispatch(null.asInstanceOf[Dispatcher], events); } // Invoke dispatch with None dispatchers @@ -136,7 +119,6 @@ class TestOutputDispatcher extends SparkSpec("src/test/resources/sample_telemetr OutputDispatcher.dispatch(Option(Array(Dispatcher("console", Map("printEvent" -> false.asInstanceOf[AnyRef])))), noEvents); } - OutputDispatcher.dispatch(Dispatcher("console", Map("printEvent" -> false.asInstanceOf[AnyRef])), Array[String]()); } it should "execute test cases related to script dispatcher" in { @@ -155,52 +137,57 @@ class TestOutputDispatcher extends SparkSpec("src/test/resources/sample_telemetr val f = new File("src/test/resources/test_output.log"); f.exists() should be(true) CommonUtil.deleteFile("src/test/resources/test_output.log"); + + OutputDispatcher.dispatch(StorageConfig("local", null, "src/test/resources/test_output.log"), events); + val f2 = new File("src/test/resources/test_output.log"); + f2.exists() should be(true) + CommonUtil.deleteFile("src/test/resources/test_output.log"); } + + it should "give DispatcherException if azure config is missing " in { - it should "dispatch output to azure" in { - - implicit val mockFc = mock[FrameworkContext]; - val mockStorageService = mock[BaseStorageService] - (mockFc.getStorageService(_: String): BaseStorageService).expects("azure").returns(mockStorageService).anyNumberOfTimes(); - (mockStorageService.upload _).expects("dev-data-store", *, *, Option(false), None, None, None).returns(null).anyNumberOfTimes(); - (mockStorageService.closeContext _).expects().returns().anyNumberOfTimes() - val date = System.currentTimeMillis() - val output1 = Dispatcher("azure", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> s"output/test-dispatcher1-$date.json", "zip" -> true.asInstanceOf[AnyRef])); - val output2 = Dispatcher("azure", Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> s"output/test-dispatcher2-$date.json", "filePath" -> "src/test/resources/sample_telemetry.log")); - val strData = events.map(f => JSONUtils.serialize(f)) - - noException should be thrownBy { - OutputDispatcher.dispatch(output2, strData.collect()); - } + implicit val fc = new FrameworkContext(); + val eventArr = events.map(f => JSONUtils.serialize(f)).cache(); + + the[DispatcherException] thrownBy { + AzureDispatcher.dispatch(Map[String, AnyRef]("key" -> "output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/"), eventArr); + } should have message "'bucket' & 'key' parameters are required to send output to azure" + + the[DispatcherException] thrownBy { + AzureDispatcher.dispatch(Map[String, AnyRef]("bucket" -> "test-bucket", "dirPath" -> "src/test/resources/1234/OE_INTERACT/"), eventArr); + } should have message "'bucket' & 'key' parameters are required to send output to azure" + + the[DispatcherException] thrownBy { + OutputDispatcher.dispatch(StorageConfig("azure", "test-bucket", null), eventArr); + } should have message "'bucket' & 'key' parameters are required to send output to azure" + + the[DispatcherException] thrownBy { + OutputDispatcher.dispatch(StorageConfig("azure", null, "output/test-directory/"), eventArr); + } should have message "'bucket' & 'key' parameters are required to send output to azure" } - it should "dispatch directory to azure" in { - implicit val mockFc = mock[FrameworkContext]; - val mockStorageService = mock[BaseStorageService] - (mockFc.getStorageService(_: String): BaseStorageService).expects("azure").returns(mockStorageService).anyNumberOfTimes(); - (mockStorageService.upload _).expects("dev-data-store", *, *, Option(true), *, Option(3), *).returns("").anyNumberOfTimes(); - (mockStorageService.closeContext _).expects().returns().anyNumberOfTimes() - //noException should be thrownBy { - AzureDispatcher.dispatchDirectory(Map[String, AnyRef]("bucket" -> "dev-data-store", "key" -> s"output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/")); - //} - } - - it should "give DispatcherException if azure config is missing " in { + it should "dispatch output to S3/Azure" in { implicit val fc = new FrameworkContext(); - the[DispatcherException] thrownBy { - AzureDispatcher.dispatchDirectory(Map[String, AnyRef]("key" -> s"output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/")); - } should have message "'local file path', 'bucket' & 'key' parameters are required to upload directory to azure" - the[DispatcherException] thrownBy { - AzureDispatcher.dispatch(Map[String, AnyRef]("key" -> s"output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/"), events.map(f => JSONUtils.serialize(f))); - } should have message "'bucket' & 'key' parameters are required to send output to azure" + a[AzureException] should be thrownBy { + AzureDispatcher.dispatch(Map[String, AnyRef]("key" -> "test_key", "bucket" -> "test_bucket"), events.map(f => JSONUtils.serialize(f))); + } + + a[AzureException] should be thrownBy { + OutputDispatcher.dispatch(StorageConfig("azure", "test_bucket", "test_key", Option("azure_storage_key")), events.map(f => JSONUtils.serialize(f))); + } + + a[AzureException] should be thrownBy { + OutputDispatcher.dispatch(StorageConfig("azure", "test_bucket", "test_key"), events.map(f => JSONUtils.serialize(f))); + } + + a[IllegalArgumentException] should be thrownBy { + S3Dispatcher.dispatch(Map[String, AnyRef]("key" -> "test_key", "bucket" -> "test_bucket"), events.map(f => JSONUtils.serialize(f))); + } - the[DispatcherException] thrownBy { - AzureDispatcher.dispatch(events.map(f => JSONUtils.serialize(f)).collect(), Map[String, AnyRef]("key" -> s"output/test-directory/", "dirPath" -> "src/test/resources/1234/OE_INTERACT/")); - } should have message "'bucket' & 'key' parameters are required to send output to azure" } it should "dispatch output to elastic-search" in { diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/dispatcher/TestSlackDispatcher.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/dispatcher/TestSlackDispatcher.scala index 76b81856..0bcac88d 100644 --- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/dispatcher/TestSlackDispatcher.scala +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/dispatcher/TestSlackDispatcher.scala @@ -23,6 +23,10 @@ class TestSlackDispatcher extends SparkSpec { the[DispatcherException] thrownBy { SlackDispatcher.dispatch(Map("channel" -> "testing"), sc.parallelize(List("test"))); } should have message "'channel' & 'userName' parameters are required to send output to slack" + + the[DispatcherException] thrownBy { + SlackDispatcher.dispatch(Map("userName" -> "testing"), sc.parallelize(List("test"))); + } should have message "'channel' & 'userName' parameters are required to send output to slack" } } diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/factory/TestDispatcherFactory.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/factory/TestDispatcherFactory.scala index 3ab30360..7ecb557b 100644 --- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/factory/TestDispatcherFactory.scala +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/factory/TestDispatcherFactory.scala @@ -7,11 +7,11 @@ class TestDispatcherFactory extends BaseSpec { it should "return a Model class for a model code" in { - val dispatcherList = List(Dispatcher("s3file", Map()), Dispatcher("s3", Map()), Dispatcher("kafka", Map()), Dispatcher("script", Map()), + val dispatcherList = List(Dispatcher("s3", Map()), Dispatcher("kafka", Map()), Dispatcher("script", Map()), Dispatcher("console", Map()), Dispatcher("file", Map()), Dispatcher("azure", Map()), Dispatcher("slack", Map()), Dispatcher("elasticsearch", Map())) val dispatchers = dispatcherList.map { f => DispatcherFactory.getDispatcher(f) } - dispatchers(1) should be(S3Dispatcher) + dispatchers(0) should be(S3Dispatcher) } } diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/fetcher/TestDruidDataFetcher.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/fetcher/TestDruidDataFetcher.scala index 59eeedde..58806243 100644 --- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/fetcher/TestDruidDataFetcher.scala +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/fetcher/TestDruidDataFetcher.scala @@ -2,6 +2,10 @@ package org.ekstep.analytics.framework.fetcher import java.time.{ZoneOffset, ZonedDateTime} +import akka.actor.ActorSystem +import akka.http.scaladsl.model._ +import akka.stream.scaladsl.Source +import akka.util.ByteString import cats.syntax.either._ import ing.wbaa.druid._ import ing.wbaa.druid.client.DruidClient @@ -9,13 +13,37 @@ import ing.wbaa.druid.definitions.{AggregationType, PostAggregationType} import io.circe._ import io.circe.parser._ import org.ekstep.analytics.framework._ +import org.ekstep.analytics.framework.util.{CommonUtil, EmbeddedPostgresqlService, HTTPClient, JSONUtils} import org.scalamock.scalatest.MockFactory import org.scalatest.Matchers +import org.joda.time.DateTimeUtils +import org.sunbird.cloud.storage.conf.AppConf import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future + + class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory { + override def beforeAll () { + super.beforeAll() + EmbeddedPostgresqlService.start() + EmbeddedPostgresqlService.createNominationTable() + } + + it should "check for getDimensionByType methods" in { + val defaultExpr = DruidDataFetcher.getDimensionByType(None, "field", Option("field1")) + defaultExpr.toString should be ("Dim(field,Some(field1),None,None)") + + val javascriptExtractionExpr = DruidDataFetcher.getDimensionByType(Option("extraction"), "field", Option("field1"), Option("String"), Option(List(ExtractFn("javascript", "function(x) { return x + 10; }")))) + javascriptExtractionExpr.toString should be ("Dim(field,Some(field1),Some(String),Some(JavascriptExtractionFn(function(x) { return x + 10; },Some(false))))") + + val lookupExtractionExpr = DruidDataFetcher.getDimensionByType(Option("extraction"), "field", Option("field1"), Option("String"), Option(List(ExtractFn("registeredlookup", "channel")))) + lookupExtractionExpr.toString should be ("Dim(field,Some(field1),Some(String),Some(RegisteredLookupExtractionFn(channel,Some(false),None)))") + + val cascadeExtractionExpr = DruidDataFetcher.getDimensionByType(Option("cascade"), "field", Option("field1"), Option("String"), Option(List(ExtractFn("registeredlookup", "channel"),ExtractFn("javascript", "function(x) { return x + 10; }")))) + cascadeExtractionExpr.toString should be ("Dim(field,Some(field1),Some(String),Some(CascadeExtractionFn(List(RegisteredLookupExtractionFn(channel,Some(false),None), JavascriptExtractionFn(function(x) { return x + 10; },Some(false))))))") + } it should "check for getAggregationTypes methods" in { @@ -69,6 +97,24 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory { val javascriptExprWithoutName = DruidDataFetcher.getAggregationByType(AggregationType.Javascript, None, "field", Option("function(current, edata_size) { return current + (edata_size == 0 ? 1 : 0); }"), Option("function(partialA, partialB) { return partialA + partialB; }"), Option("function () { return 0; }")) + + a[Exception] should be thrownBy { + DruidDataFetcher.getAggregationByType(AggregationType.Filtered, Option("Last"), "field", None, None, None) + } + + a[Exception] should be thrownBy { + DruidDataFetcher.getAggregationByType(AggregationType.Filtered, Option("Last"), "field", None, None, None, None, None, None, Option("longSum")) + } + + a[Exception] should be thrownBy { + DruidDataFetcher.getAggregationByType(AggregationType.Filtered, Option("Last"), "field", None, None, None, None, None, None, Option("longSum"), Option("edata_size")) + } + + val filteredExp = DruidDataFetcher.getAggregationByType(AggregationType.Filtered, Option("Last"), "field", None, None, None, None, None, None, Option("longSum"), Option("edata_size"), Option(0.asInstanceOf[AnyRef])) + filteredExp.toString should be("SelectorFilteredAgg(edata_size,Some(0),LongSumAggregation(Last,field),None)") + + DruidDataFetcher.getAggregation(Option(List(Aggregation(Option("count"), "test", "field")))).head.getName should be ("count"); + } it should "check for getFilterTypes methods" in { @@ -97,6 +143,41 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory { val greaterThanExpr = DruidDataFetcher.getFilterByType("greaterthan", "field", List(0.asInstanceOf[AnyRef])) val lessThanExpr = DruidDataFetcher.getFilterByType("lessthan", "field", List(1000.asInstanceOf[AnyRef])) + + a[Exception] should be thrownBy { + DruidDataFetcher.getFilterByType("test", "field", List(1000.asInstanceOf[AnyRef])) + } + + DruidDataFetcher.getFilter(None) should be (None) + + DruidDataFetcher.getFilter(Option(List(DruidFilter("in", "eid", None, None)))).get.asFilter.toString() should be ("AndFilter(List(InFilter(eid,List(),None)))") + DruidDataFetcher.getFilter(Option(List(DruidFilter("in", "eid", Option("START"), None)))).get.asFilter.toString() should be ("AndFilter(List(InFilter(eid,List(START),None)))") + } + + it should "check for getGroupByHaving methods" in { + + var filteringExpr = DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef]))); + filteringExpr.get.asFilter.toString() should be ("BoundFilter(doubleSum,None,Some(20.0),None,Some(true),Some(Numeric),None)") + + filteringExpr = DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("equalTo", "user_id", "user1"))); + filteringExpr.get.asFilter.toString() should be ("SelectFilter(user_id,Some(user1),None)") + + filteringExpr = DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("not", "user_id", "user1"))); + filteringExpr.get.asFilter.toString() should be ("NotFilter(SelectFilter(user_id,Some(user1),None))") + + filteringExpr = DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("greaterThan", "doubleSum", 20.asInstanceOf[AnyRef]))); + filteringExpr.get.asFilter.toString() should be ("BoundFilter(doubleSum,Some(20.0),None,Some(true),None,Some(Numeric),None)") + + a[Exception] should be thrownBy { + DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("and", "doubleSum", 20.asInstanceOf[AnyRef]))); + } + + a[Exception] should be thrownBy { + DruidDataFetcher.getGroupByHaving(Option(DruidHavingFilter("in", "doubleSum", 20.asInstanceOf[AnyRef]))); + } + + DruidDataFetcher.getGroupByHaving(None) should be (None); + } it should "check for getPostAggregation methods" in { @@ -113,15 +194,62 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory { val divisionExpr = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Division", PostAggregationFields("field", ""), "/") divisionExpr.getName.toString should be ("Division") - val javaScriptExpr = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Javascript, "Percentage", PostAggregationFields("fieldA", "fieldB"), "function(a, b) { return (a / b) * 100; }") + val javaScriptExpr1 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Javascript, "Percentage", PostAggregationFields("fieldA", "fieldB"), "function(a, b) { return (a / b) * 100; }") + javaScriptExpr1.toString should be ("JavascriptPostAgg(List(fieldA, fieldB),function(a, b) { return (a / b) * 100; },Some(Percentage))") + + val javaScriptExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Javascript, "MultiplyBy100", PostAggregationFields("fieldA", ""), "function(a) { return a * 100; }") + javaScriptExpr2.toString should be ("JavascriptPostAgg(List(fieldA),function(a) { return a * 100; },Some(MultiplyBy100))") + + val additionExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Addition", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "+") + additionExpr2.getName.toString should be ("Addition") + + val subtractionExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Subtraction", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "-") + subtractionExpr2.getName.toString should be ("Subtraction") + + val multiplicationExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Product", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "*") + multiplicationExpr2.getName.toString should be ("Product") + + val divisionExpr2 = DruidDataFetcher.getPostAggregationByType(PostAggregationType.Arithmetic, "Division", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "/") + divisionExpr2.getName.toString should be ("Division") + + a[Exception] should be thrownBy { + DruidDataFetcher.getPostAggregation(Option(List(PostAggregation("longLeast", "Division", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "/")))) + } + + a[Exception] should be thrownBy { + DruidDataFetcher.getPostAggregation(Option(List(PostAggregation("test", "Division", PostAggregationFields("field", 1.asInstanceOf[AnyRef], "constant"), "/")))) + } + + DruidDataFetcher.getPostAggregation(None) should be (None); + } - + + it should "test the getDruidQuery method" in { + var query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), None, None, None) + var druidQuery = DruidDataFetcher.getDruidQuery(query) + druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),None,List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,None,None,List(),Map())"); + + query = DruidQueryModel("topN", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), Option(List(Aggregation(Option("count"), "count", ""))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")))), None, None, None) + druidQuery = DruidDataFetcher.getDruidQuery(query) + druidQuery.toString() should be ("TopNQuery(DefaultDimension(context_pdata_id,Some(producer_id),None),100,count,List(CountAggregation(count)),List(2019-11-01/2019-11-02),Day,None,List(),Map())"); + + query = DruidQueryModel("timeSeries", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), None, None, None, None, None) + druidQuery = DruidDataFetcher.getDruidQuery(query) + druidQuery.toString() should be ("TimeSeriesQuery(List(CountAggregation(count_count)),List(2019-11-01/2019-11-02),None,Day,false,List(),Map())"); + + DateTimeUtils.setCurrentMillisFixed(1577836800000L); // Setting Jan 1 2020 as current time + query = DruidQueryModel("topN", "telemetry-events", "Last7Days", Option("day"), Option(List(Aggregation(Option("count"), "count", ""))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")))), None, None, None, intervalSlider = 2) + druidQuery = DruidDataFetcher.getDruidQuery(query) + druidQuery.toString() should be ("TopNQuery(DefaultDimension(context_pdata_id,Some(producer_id),None),100,count,List(CountAggregation(count)),List(2019-12-23T05:30:00+00:00/2019-12-30T05:30:00+00:00),Day,None,List(),Map())"); + DateTimeUtils.setCurrentMillisSystem(); + } + it should "fetch the data from druid using groupBy query type" in { - val query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END")))))) + val query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef])), Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+")))) val druidQuery = DruidDataFetcher.getDruidQuery(query) - druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,None,None,List(),Map())") - + druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,Some(LessThanHaving(doubleSum,20.0)),None,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())") + val json: String = """ { "total_scans" : 9007, @@ -129,54 +257,86 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory { } """ val doc: Json = parse(json).getOrElse(Json.Null); - val results = List(DruidResult.apply(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC), doc)); - val druidResponse = DruidResponse.apply(results, QueryType.GroupBy) + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy) implicit val mockFc = mock[FrameworkContext]; implicit val druidConfig = mock[DruidConfig]; val mockDruidClient = mock[DruidClient] - (mockDruidClient.doQuery(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) - (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient); + (mockDruidClient.doQuery[DruidResponse](_:DruidNativeQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)).anyNumberOfTimes() + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); - val druidResult = DruidDataFetcher.getDruidData(query) + val druidResult = DruidDataFetcher.getDruidData(query).collect druidResult.size should be (1) druidResult.head should be ("""{"total_scans":9007.0,"producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""") } - + it should "fetch the data from druid using timeseries query type" in { - val query = DruidQueryModel("timeSeries", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END")))))) + val query = DruidQueryModel("timeSeries", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), None, Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+")))) val druidQuery = DruidDataFetcher.getDruidQuery(query); - druidQuery.toString() should be ("TimeSeriesQuery(List(CountAggregation(count_count)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),Day,false,List(),Map())"); - - val json: String = """ + druidQuery.toString() should be ("TimeSeriesQuery(List(CountAggregation(count_count)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),Day,false,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())"); + + var json: String = """ { "total_scans" : 9007, "producer_id" : "dev.sunbird.learning.platform" } """ - val doc: Json = parse(json).getOrElse(Json.Null); - val results = List(DruidResult.apply(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC), doc)); - val druidResponse = DruidResponse.apply(results, QueryType.Timeseries) + var doc: Json = parse(json).getOrElse(Json.Null); + var results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)); + var druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.Timeseries) implicit val mockFc = mock[FrameworkContext]; implicit val druidConfig = mock[DruidConfig]; val mockDruidClient = mock[DruidClient] - (mockDruidClient.doQuery(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) - (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient); + (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes() + + var druidResult = DruidDataFetcher.getDruidData(query).collect - val druidResult = DruidDataFetcher.getDruidData(query) - druidResult.size should be (1) druidResult.head should be ("""{"total_scans":9007.0,"producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""") + + json = """ + { + "total_scans" : null, + "producer_id" : "dev.sunbird.learning.platform" + } + """ + doc = parse(json).getOrElse(Json.Null); + results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)); + druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.Timeseries) + (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) + // (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient) + + druidResult = DruidDataFetcher.getDruidData(query).collect() + druidResult.size should be (1) + druidResult.head should be ("""{"total_scans":"unknown","producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""") + + json = """ + { + "total_scans" : {}, + "producer_id" : "dev.sunbird.learning.platform" + } + """ + doc = parse(json).getOrElse(Json.Null); + results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)); + druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.Timeseries) + (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) + // (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient) + + druidResult = DruidDataFetcher.getDruidData(query).collect() + + druidResult.size should be (1) } it should "fetch the data from druid using topN query type" in { - val query = DruidQueryModel("topN", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), Option(List(Aggregation(Option("count"), "count", ""))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END")))))) + val query = DruidQueryModel("topN", "telemetry-events", "2019-11-01/2019-11-02", Option("day"), Option(List(Aggregation(Option("count"), "count", ""))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), None, Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+")))) val druidQuery = DruidDataFetcher.getDruidQuery(query); - druidQuery.toString() should be ("TopNQuery(DefaultDimension(context_pdata_id,Some(producer_id),None),100,count,List(CountAggregation(count)),List(2019-11-01/2019-11-02),Day,Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(),Map())") + druidQuery.toString() should be ("TopNQuery(DefaultDimension(context_pdata_id,Some(producer_id),None),100,count,List(CountAggregation(count)),List(2019-11-01/2019-11-02),Day,Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())") val json: String = """ [ @@ -187,23 +347,389 @@ class TestDruidDataFetcher extends SparkSpec with Matchers with MockFactory { { "count" : 1, "producer_id" : "local.sunbird.desktop" + }, + { + "count" : null, + "producer_id" : "local.sunbird.app" + }, + { + "count" : {}, + "producer_id" : "local.sunbird.app" } ] """ val doc: Json = parse(json).getOrElse(Json.Null); - val results = List(DruidResult.apply(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC), doc)); - val druidResponse = DruidResponse.apply(results, QueryType.TopN) + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.TopN) + + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient) + + val druidResult = DruidDataFetcher.getDruidData(query).collect() + + druidResult.size should be (4) + druidResult(0) should be ("""{"date":"2019-11-28","count":5,"producer_id":"dev.sunbird.portal"}""") + druidResult(1) should be ("""{"date":"2019-11-28","count":1,"producer_id":"local.sunbird.desktop"}""") + druidResult(2) should be ("""{"date":"2019-11-28","count":"unknown","producer_id":"local.sunbird.app"}""") + + val druidResponse2 = DruidResponseTimeseriesImpl.apply(List(), QueryType.TopN) + (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse2)) + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient) + val druidResult2 = DruidDataFetcher.getDruidData(query).collect() + druidResult2.size should be (0) + + } + it should "fetch the data from druid rollup cluster using groupBy query type" in { + + val query = DruidQueryModel("groupBy", "telemetry-rollup-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef])), Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+")))) + val druidQuery = DruidDataFetcher.getDruidQuery(query) + druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,Some(LessThanHaving(doubleSum,20.0)),None,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())") + + val json: String = """ + { + "total_scans" : 9007, + "producer_id" : "dev.sunbird.learning.platform" + } + """ + val doc: Json = parse(json).getOrElse(Json.Null); + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy) + + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) + (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient); + + val druidResult = DruidDataFetcher.getDruidData(query).collect() + + druidResult.size should be (1) + druidResult.head should be ("""{"total_scans":9007.0,"producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""") + } + + it should "fetch data for groupBy dimensions with extraction fn" in { + val qrScans = DruidQueryModel("groupBy", "telemetry-rollup-syncts", "2020-03-01/2020-04-01", Option("all"), Option(List(Aggregation(Option("total_scans"),"longSum", "total_count"))), Option(List(DruidDimension("derived_loc_state", Option("state")), DruidDimension("derived_loc_district", Option("district"),Option("Extraction"), Option("STRING"), Option(List(ExtractFn("javascript", "function(str){return str == null ? null: str.toLowerCase().trim().split(' ').map(function(t){return t.substring(0,1).toUpperCase()+t.substring(1,t.length)}).join(' ')}")))))), Option(List(DruidFilter("in", "object_type", None, Option(List("qr", "Qr", "DialCode", "dialcode"))), DruidFilter("equals", "eid", Option("SEARCH")), DruidFilter("equals", "derived_loc_state", Option("Andhra Pradesh")), DruidFilter("isnotnull", "derived_loc_district", None)))) + val druidQuery = DruidDataFetcher.getDruidQuery(qrScans) + druidQuery.toString should be ("GroupByQuery(List(LongSumAggregation(total_scans,total_count)),List(2020-03-01/2020-04-01),Some(AndFilter(List(InFilter(object_type,List(qr, Qr, DialCode, dialcode),None), SelectFilter(eid,Some(SEARCH),None), SelectFilter(derived_loc_state,Some(Andhra Pradesh),None), NotFilter(SelectFilter(derived_loc_district,None,None))))),List(DefaultDimension(derived_loc_state,Some(state),None), ExtractionDimension(derived_loc_district,Some(district),Some(STRING),JavascriptExtractionFn(function(str){return str == null ? null: str.toLowerCase().trim().split(' ').map(function(t){return t.substring(0,1).toUpperCase()+t.substring(1,t.length)}).join(' ')},Some(false)))),All,None,None,List(),Map())") + + + val json = """{"total_scans":7257.0,"district":"Anantapur","state":"Andhra Pradesh","date":"2020-03-01"}""" + + val doc: Json = parse(json).getOrElse(Json.Null); + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy) implicit val mockFc = mock[FrameworkContext]; implicit val druidConfig = mock[DruidConfig]; val mockDruidClient = mock[DruidClient] - (mockDruidClient.doQuery(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) - (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient); + (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)).anyNumberOfTimes() + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + val druidResult = DruidDataFetcher.getDruidData(qrScans).collect() + + druidResult.size should be (1) + druidResult.head should be ("""{"total_scans":7257.0,"district":"Anantapur","state":"Andhra Pradesh","date":"2020-03-01"}""") + } + + "TesthLL" should "fetch data for groupBy dimension with HLLAggregator" in { + val districtMonthly = DruidQueryModel("groupBy", "summary-distinct-counts", "2020-05-12/2020-05-13", Option("all"), Option(List(Aggregation(Option("total_unique_devices"), "HLLSketchMerge", "unique_devices", None, None, None, None, None), Aggregation(None, "HLLSketchMerge", "devices", None, None, None, None, None), Aggregation(Option("Count"), "count", ""))), Option(List(DruidDimension("derived_loc_state", Option("state")), DruidDimension("derived_loc_district", Option("district")))), Option(List(DruidFilter("in", "dimensions_pdata_id", None, Option(List("prod.diksha.app", "prod.diksha.portal"))), DruidFilter("isnotnull", "derived_loc_district", None)))) + val druidQuery = DruidDataFetcher.getDruidQuery(districtMonthly) + druidQuery.toString should be ("GroupByQuery(List(HLLAggregation(total_unique_devices,unique_devices,12,HLL_4,true), HLLAggregation(hllsketchmerge_devices,devices,12,HLL_4,true), CountAggregation(Count)),List(2020-05-12/2020-05-13),Some(AndFilter(List(InFilter(dimensions_pdata_id,List(prod.diksha.app, prod.diksha.portal),None), NotFilter(SelectFilter(derived_loc_district,None,None))))),List(DefaultDimension(derived_loc_state,Some(state),None), DefaultDimension(derived_loc_district,Some(district),None)),All,None,None,List(),Map())") + + val json = """{"state":"Andaman & Nicobar Islands","total_unique_devices":1.0,"Count":9.0,"date":"2020-03-01","district":"Ahmednagar"}""" + val doc: Json = parse(json).getOrElse(Json.Null); + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy) + + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + + val druidResult = DruidDataFetcher.getDruidData(districtMonthly).collect() + druidResult.size should be (1) + druidResult.head should be ("""{"state":"Andaman & Nicobar Islands","total_unique_devices":1.0,"Count":9.0,"date":"2020-03-01","district":"Ahmednagar"}""") + } + + "TestFetcher" should "fetch data for TopN dimension with Lookup" in { + val query = DruidQueryModel("topN", "telemetry-events", "2020-03-12T00:00:00+00:00/2020-05-12T00:00:00+00:00", Option("all"), + Option(List(Aggregation(Option("count"), "count", "count"))), + Option(List(DruidDimension("dialcode_channel", Option("dialcode_slug"), Option("extraction"), None, + Option(List(ExtractFn("registeredlookup", "channel")))))), + Option(List(DruidFilter("equals", "dialcode_channel", Option("012315809814749184151")))), None, None, None,None,None, Option("count")) + + val druidQuery = DruidDataFetcher.getDruidQuery(query) + druidQuery.toString should be("TopNQuery(ExtractionDimension(dialcode_channel,Some(dialcode_slug),None,RegisteredLookupExtractionFn(channel,Some(false),None)),100,count,List(CountAggregation(count)),List(2020-03-12T00:00:00+00:00/2020-05-12T00:00:00+00:00),All,Some(AndFilter(List(SelectFilter(dialcode_channel,Some(012315809814749184151),None)))),List(),Map())") + + val json = """[{"date":"2020-03-13","count":9,"dialcode_slug":"Andaman & Nicobar Islands"}]""" + val doc: Json = parse(json).getOrElse(Json.Null); + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.TopN) + + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.doQuery[DruidResponse](_: DruidQuery)(_: DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)).anyNumberOfTimes() + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + + val druidResult = DruidDataFetcher.getDruidData(query).collect() + druidResult.size should be (1) + druidResult.head should be ("""{"date":"2020-03-13","count":9,"dialcode_slug":"Andaman & Nicobar Islands"}""") + } + + it should "fetch data for GroupBy dimension with Lookup and replaceMissingValue as Unknown" in { + val lookupQuery = DruidQueryModel("groupBy", "telemetry-events", "2020-05-08T00:00:00+00:00/2020-05-15T00:00:00+00:00", Option("all"), + Option(List(Aggregation(Option("count"), "count", "count"))), + Option(List(DruidDimension("derived_loc_state", Option("state_slug"), Option("extraction"), None, + Option(List(ExtractFn("registeredlookup", "lookup_state", None, Option("Unknown"))))), DruidDimension("derived_loc_district", Option("district_slug"), Option("extraction"), None, + Option(List(ExtractFn("registeredlookup", "lookup_district", None, Option("Unknown")))))))) + + val query = DruidDataFetcher.getDruidQuery(lookupQuery) + query.toString should be("GroupByQuery(List(CountAggregation(count)),List(2020-05-08T00:00:00+00:00/2020-05-15T00:00:00+00:00),None,List(ExtractionDimension(derived_loc_state,Some(state_slug),None,RegisteredLookupExtractionFn(lookup_state,None,Some(Unknown))), ExtractionDimension(derived_loc_district,Some(district_slug),None,RegisteredLookupExtractionFn(lookup_district,None,Some(Unknown)))),All,None,None,List(),Map())") + + val json = """{"district_slug":"Andamans","state_slug":"Andaman & Nicobar Islands","count":138.0,"date":"2020-05-08"}""" + val doc: Json = parse(json).getOrElse(Json.Null); + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy) + + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.doQuery[DruidResponse](_: DruidQuery)(_: DruidConfig)).expects(query, *).returns(Future(druidResponse)).anyNumberOfTimes() + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + + val druidResult = DruidDataFetcher.getDruidData(lookupQuery).collect() + druidResult.size should be (1) + druidResult.head should be ("""{"district_slug":"Andamans","state_slug":"Andaman & Nicobar Islands","count":138.0,"date":"2020-03-01"}""") + } + + it should "fetch data for filtered aggregation" in { + val scansQuery = DruidQueryModel("groupBy", "summary-distinct-counts", "2020-05-12/2020-05-13", Option("all"), Option(List(Aggregation(Option("total_failed_scans"), "filtered", "total_count", None, None, None, None, None, None, Option("longSum"), Option("edata_size"), Option(0.asInstanceOf[AnyRef])))), Option(List(DruidDimension("derived_loc_state", Option("state")), DruidDimension("derived_loc_district", Option("district")))), Option(List(DruidFilter("in", "dimensions_pdata_id", None, Option(List("prod.diksha.app", "prod.diksha.portal"))), DruidFilter("isnotnull", "derived_loc_district", None)))) + val druidQuery = DruidDataFetcher.getDruidQuery(scansQuery) + druidQuery.toString should be ("GroupByQuery(List(SelectorFilteredAggregation(total_failed_scans,SelectFilter(edata_size,Some(0),None),LongSumAggregation(total_failed_scans,total_count))),List(2020-05-12/2020-05-13),Some(AndFilter(List(InFilter(dimensions_pdata_id,List(prod.diksha.app, prod.diksha.portal),None), NotFilter(SelectFilter(derived_loc_district,None,None))))),List(DefaultDimension(derived_loc_state,Some(state),None), DefaultDimension(derived_loc_district,Some(district),None)),All,None,None,List(),Map())") + + val json = """{"state":"Andaman & Nicobar Islands","total_failed_scans":10,"date":"2020-03-01","district":"Ahmednagar"}""" + val doc: Json = parse(json).getOrElse(Json.Null); + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2020, 3, 1, 0, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResponseTimeseriesImpl.apply(results, QueryType.GroupBy) + + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.doQuery[DruidResponse](_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)) + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + + val druidResult = DruidDataFetcher.getDruidData(scansQuery).collect() + druidResult.size should be (1) + druidResult.head should be ("""{"state":"Andaman & Nicobar Islands","total_failed_scans":10.0,"date":"2020-03-01","district":"Ahmednagar"}""") + } + + it should "give result for stream query" in { + val query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef])), Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+")))) + val druidQuery = DruidDataFetcher.getDruidQuery(query) + druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,Some(LessThanHaving(doubleSum,20.0)),None,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())") + + val json: String = """ + { + "total_scans" : 9007, + "producer_id" : "dev.sunbird.learning.platform" + } + """ + val doc: Json = parse(json).getOrElse(Json.Null); + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc) + + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes() + (mockDruidClient.doQueryAsStream(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Source(List(druidResponse))).anyNumberOfTimes() + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + + val druidResult = DruidDataFetcher.getDruidData(query,true).collect() + + druidResult.size should be (1) + druidResult.head should be ("""{"total_scans":9007.0,"producer_id":"dev.sunbird.learning.platform","date":"2019-11-28"}""") + } + + it should "give result for stream topn query" in { + val query = DruidQueryModel("topN", "telemetry-events", "2020-03-12T00:00:00+00:00/2020-05-12T00:00:00+00:00", Option("all"), + Option(List(Aggregation(Option("count"), "count", "count"))), + Option(List(DruidDimension("dialcode_channel", Option("dialcode_slug"), Option("extraction"), None, + Option(List(ExtractFn("registeredlookup", "channel")))))), + Option(List(DruidFilter("equals", "dialcode_channel", Option("012315809814749184151")))), None, None,None, None,None, Option("count")) + val druidQuery = DruidDataFetcher.getDruidQuery(query) + + val json = """[{"date":"2020-03-13","count":9,"dialcode_slug":"Andaman & Nicobar Islands"}]""" + val doc: Json = parse(json).getOrElse(Json.Null); + val results = List(DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc)); + val druidResponse = DruidResult.apply(Some(ZonedDateTime.of(2019, 11, 28, 17, 0, 0, 0, ZoneOffset.UTC)), doc) + + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes() + (mockDruidClient.doQueryAsStream(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Source(List(druidResponse))).anyNumberOfTimes() + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + + val druidResult = DruidDataFetcher.getDruidData(query,true).collect() + + druidResult.size should be (1) + druidResult.head should be ("""{"date":"2020-03-13","count":9,"dialcode_slug":"Andaman & Nicobar Islands"}""") + } + + it should "test scan query with stream" in { + + val query = DruidQueryModel("scan", "summary-rollup-syncts", "2020-03-12T00:00:00+00:00/2020-03-13T00:00:00+00:00", Option("all"), + None, None, None, None, None,Option(List("derived_loc_state","derived_loc_district")), None, None) + val druidQuery = DruidDataFetcher.getDruidQuery(query) + + val json = """{"__time":1583971200000,"derived_loc_state":"unknown","derived_loc_district":"unknown","date":"2019-03-12"}""" + val json1 = """{"__time":1583971200000,"derived_loc_state":"ka","derived_loc_district":"unknown","date":"2019-03-12"}""" + val json2 = """{"__time":1583971200000,"derived_loc_state":"apekx","derived_loc_district":"Vizag","date":"2019-03-12"}""" + val doc: Json = parse(json).getOrElse(Json.Null); + val doc1: Json = parse(json1).getOrElse(Json.Null) + val doc2: Json = parse(json2).getOrElse(Json.Null) + val druidResponse = DruidScanResult.apply(doc) + val druidResponse1 = DruidScanResult.apply(doc1) + val druidResponse2 = DruidScanResult.apply(doc2) + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes() + (mockDruidClient.doQueryAsStream(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Source(List(druidResponse,druidResponse1,druidResponse2))).anyNumberOfTimes() + (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + + val druidResult = DruidDataFetcher.getDruidData(query,true).collect() + + druidResult.size should be (3) + druidResult.head should be ("""{"__time":1.5839712E12,"derived_loc_state":"unknown","derived_loc_district":"unknown","date":"2020-03-12"}""") + + } + + it should "test scan query without stream" in { + + val query = DruidQueryModel("scan", "summary-events", "2020-03-12T00:00:00+00:00/2020-03-13T00:00:00+00:00", Option("all"), + None, None, Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), None, None,None, None, None) + val druidQuery = DruidDataFetcher.getDruidQuery(query) + val json = """{"__time":1583971200000,"derived_loc_state":"unknown","derived_loc_district":"unknown","date":"2019-03-12","created_for": null,"active":true}""" + val doc: Json = parse(json).getOrElse(Json.Null) + val results = List(DruidScanResult.apply(doc)); + val scanresults = DruidScanResults.apply("122",List("derived_loc_state","derived_loc_district","active"),results) + val druidResponse = DruidScanResponse.apply(List(scanresults)) + implicit val mockFc = mock[FrameworkContext] + implicit val druidConfig = mock[DruidConfig] + val mockDruidClient = mock[DruidClient] + (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes() + (mockDruidClient.doQuery[DruidResponse](_:DruidNativeQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Future(druidResponse)).anyNumberOfTimes() + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + + val druidResult = DruidDataFetcher.getDruidData(query).collect() + + druidResult.size should be (1) + druidResult.head should be ( + """{"created_for":"unknown","derived_loc_state":"unknown","__time":1.5839712E12,"date":"2020-03-12","derived_loc_district":"unknown","active":true}""".stripMargin) + + } + + it should "test query with stream with empty results" in { + val query = DruidQueryModel("groupBy", "telemetry-events", "2019-11-01/2019-11-02", Option("all"), Option(List(Aggregation(Option("count"), "count", ""),Aggregation(Option("total_duration"), "doubleSum", "edata_duration"))), Option(List(DruidDimension("context_pdata_id", Option("producer_id")), DruidDimension("context_pdata_pid", Option("producer_pid")))), Option(List(DruidFilter("in", "eid", None, Option(List("START", "END"))))), Option(DruidHavingFilter("lessThan", "doubleSum", 20.asInstanceOf[AnyRef])), Option(List(PostAggregation("arithmetic", "Addition", PostAggregationFields("field", ""), "+")))) + val druidQuery = DruidDataFetcher.getDruidQuery(query) + druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count), DoubleSumAggregation(total_duration,edata_duration)),List(2019-11-01/2019-11-02),Some(AndFilter(List(InFilter(eid,List(START, END),None)))),List(DefaultDimension(context_pdata_id,Some(producer_id),None), DefaultDimension(context_pdata_pid,Some(producer_pid),None)),All,Some(LessThanHaving(doubleSum,20.0)),None,List(ArithmeticPostAggregation(Addition,PLUS,List(FieldAccessPostAggregation(field,None), FieldAccessPostAggregation(,None)),Some(FloatingPoint))),Map())") + + val json: String = """ + { + } + """ + val doc: Json = parse(json).getOrElse(Json.Null); + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + val mockDruidClient = mock[DruidClient] + (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes() + (mockDruidClient.doQueryAsStream(_:DruidQuery)(_:DruidConfig)).expects(druidQuery, *).returns(Source(List())).anyNumberOfTimes() + (mockFc.getDruidClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + + val druidResult = DruidDataFetcher.getDruidData(query,true).collect() + + druidResult.size should be (0) + } + + it should "test sql query " in { + + val sqlQuery = DruidQueryModel("scan", "summary-rollup-syncts", "2020-08-23T00:00:00+00:00/2020-08-24T00:00:00+00:00", Option("all"), + None, None, None, None, None, None, Option(List(DruidSQLDimension("state",Option("LOOKUP(derived_loc_state, 'stateSlugLookup')")), + DruidSQLDimension("dimensions_pdata_id",None))),None) + + + implicit val mockFc = mock[FrameworkContext]; + implicit val druidConfig = mock[DruidConfig]; + + + val mockAKkaUtil = mock[AkkaHttpClient] + val url = String.format("%s://%s:%s%s%s", "http",AppConf.getConfig("druid.rollup.host"), + AppConf.getConfig("druid.rollup.port"),AppConf.getConfig("druid.url"),"sql") + val request = HttpRequest(method = HttpMethods.POST, + uri = url, + entity = HttpEntity(ContentTypes.`application/json`, JSONUtils.serialize(DruidDataFetcher.getSQLDruidQuery(sqlQuery)))) + val stripString = + """{"dimensions_pdata_id":"", "state":10} + {"dimensions_pdata_id":null, "state":5} + |{"dimensions_pdata_id":"dev.portal", "state":5}""".stripMargin + val mockDruidClient = mock[DruidClient] + (mockDruidClient.actorSystem _).expects().returning(ActorSystem("TestQuery")).anyNumberOfTimes() + (mockFc.getDruidRollUpClient: () => DruidClient).expects().returns(mockDruidClient).anyNumberOfTimes(); + (mockAKkaUtil.sendRequest(_: HttpRequest)(_: ActorSystem)) + .expects(request,mockDruidClient.actorSystem) + .returns(Future.successful(HttpResponse(entity = HttpEntity(ByteString(stripString))))).anyNumberOfTimes(); + val response = DruidDataFetcher.executeSQLQuery(sqlQuery, mockAKkaUtil) + response.count() should be (3) + } + + "DruidDataFetcher" should "verify DruidOutput operations" in { + val json: String = + """ + { + "total_sessions" : 2000, + "total_ts" : 5, + "district" : "Nellore", + "state" : "Andhra Pradesh" + } + """ + + val output = new DruidOutput(JSONUtils.deserialize[Map[String,AnyRef]](json)) + output.size should be(4) + val output2 =output + ("count" -> 1) + output2.size should be(5) + val output3 = output - ("count") + output3.size should be(4) + output3.get("total_ts").get should be(5) + } + + + it should "test the latest_index granularity" in { + EmbeddedPostgresqlService.execute("INSERT INTO druid_segments (id,datasource,start,\"end\",used) VALUES('segment1','content-model-snapshot','2020-10-27T00:00:00.000Z','2020-10-28T00:00:00.000Z','t')") + val query = DruidQueryModel("groupBy", "content-model-snapshot", "LastDay", + Option("latest_index"), Option(List(Aggregation(Option("count"), "count", ""))), + Option(List(DruidDimension("status", Option("status")))), + None,None,None) + val druidQuery = DruidDataFetcher.getDruidQuery(query) - val druidResult = DruidDataFetcher.getDruidData(query) + druidQuery.toDebugString.contains("2020-10-27T00:00:00.000Z") should be (true) + druidQuery.toString() should be ("GroupByQuery(List(CountAggregation(count)),List(2020-10-27T00:00:00.000Z/2020-10-28T00:00:00.000Z),None,List(DefaultDimension(status,Some(status),None)),All,None,None,List(),Map())") + val query1 = DruidQueryModel("groupBy", "content-snapshot", "2019-11-01/2019-11-02", + Option("latest_index"), Option(List(Aggregation(Option("count"), "count", ""))), + Option(List(DruidDimension("status", Option("status")))), + None,None,None) + val druidQuery1 = DruidDataFetcher.getDruidQuery(query1) + druidQuery1.toDebugString.contains("2019-11-01") should be (true) - druidResult.size should be (2) - druidResult.head should be ("""{"date":"2019-11-28","count":5,"producer_id":"dev.sunbird.portal"}""") - druidResult.last should be ("""{"date":"2019-11-28","count":1,"producer_id":"local.sunbird.desktop"}""") } } diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/EmbeddedPostgresqlService.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/EmbeddedPostgresqlService.scala new file mode 100644 index 00000000..160fce60 --- /dev/null +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/EmbeddedPostgresqlService.scala @@ -0,0 +1,54 @@ +package org.ekstep.analytics.framework.util + +import java.sql.{ResultSet, Statement} + +import io.zonky.test.db.postgres.embedded.EmbeddedPostgres +import java.sql.Connection + +object EmbeddedPostgresqlService { + + var pg: EmbeddedPostgres = null; + var connection: Connection = null; + var stmt: Statement = null; + + def start() { + println("******** Establishing The Postgress Connection *********") + pg = EmbeddedPostgres.builder().setPort(65124).start() + connection = pg.getPostgresDatabase().getConnection() + stmt = connection.createStatement() + println("connection.getClientInfo" + connection.getClientInfo) + } + + def createNominationTable(): Boolean = { + val tableName: String = "druid_segments" + val query = + s""" + |CREATE TABLE IF NOT EXISTS $tableName ( + | id TEXT PRIMARY KEY, + | datasource TEXT, + | start TEXT, + | \"end\" TEXT, + | used TEXT)""".stripMargin + + execute(query) + } + + def execute(sqlString: String): Boolean = { + stmt.execute(sqlString) + } + + def executeQuery(sqlString: String): ResultSet = { + stmt.executeQuery(sqlString) + } + + def dropTable(tableName: String): Boolean = { + stmt.execute(s"DROP TABLE $tableName") + } + + def close() { + println("******** Closing The Postgress Connection *********") + stmt.close() + connection.close() + pg.close() + } +} \ No newline at end of file diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestCommonUtil.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestCommonUtil.scala index a4404c75..b1541933 100644 --- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestCommonUtil.scala +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestCommonUtil.scala @@ -11,271 +11,395 @@ import java.text.SimpleDateFormat import scala.collection.mutable.ListBuffer import org.joda.time.format.DateTimeFormat import org.ekstep.analytics.framework.Period._ +import org.joda.time.DateTimeUtils +import ing.wbaa.druid.definitions.GranularityType +import com.google.common.eventbus.Subscribe +import org.ekstep.analytics.framework.conf.AppConf class TestCommonUtil extends BaseSpec { - it should "pass test case of all methods in CommonUtil" in { - try { - //datesBetween - val from = new LocalDate("2016-01-01"); - val to = new LocalDate("2016-01-04"); - CommonUtil.datesBetween(from, to).toArray should be(Array(new LocalDate("2016-01-01"), new LocalDate("2016-01-02"), new LocalDate("2016-01-03"), new LocalDate("2016-01-04"))) - - //deleteDirectory - val path = "delete-this"; - val dir = new File(path) - val dirCreated = dir.mkdir; - dirCreated should be(true); - val fp = "delete-this/delete-this.txt"; - val f = new File(fp); - f.createNewFile(); - CommonUtil.deleteDirectory(path) - dir.isDirectory() should be(false); - f.isFile() should be(false); - - //deleteFile - val filePath = "delete-this.txt"; - val noFile = "no-file.txt" - val file = new File(filePath); - val created = file.createNewFile(); - created should be(true); - CommonUtil.deleteFile(filePath) - CommonUtil.deleteFile(noFile) - file.isFile() should be(false); - - //getAge - val dateformat = new SimpleDateFormat("dd/MM/yyyy"); - val dob = dateformat.parse("04/07/1990"); - CommonUtil.getAge(dob) should be > (25) - - //getDatesBetween - CommonUtil.getDatesBetween("2016-01-01", Option("2016-01-04")) should be(Array("2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04")) - CommonUtil.getDatesBetween("2016-01-01", None) should not be null; - - //getEvent - val line = "{\"eid\":\"OE_START\",\"ts\":\"2016-01-01T12:13:20+05:30\",\"@timestamp\":\"2016-01-02T00:59:22.924Z\",\"ver\":\"1.0\",\"gdata\":{\"id\":\"org.ekstep.aser.lite\",\"ver\":\"5.7\"},\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; - val event = JSONUtils.deserialize[Event](line); - val line2 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; - val event2 = JSONUtils.deserialize[Event](line2); - val line3 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02T00:59:22+05:30\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; - val event3 = JSONUtils.deserialize[Event](line3); - val line4 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02T00:59:22P:ST\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; - val event4 = JSONUtils.deserialize[Event](line4); - val line5 = "{\"eid\":\"OE_START\",\"ets\":1451630600000,\"@timestamp\":\"2016-01-02T00:59:22.924Z\",\"ver\":\"1.0\",\"gdata\":{\"id\":\"org.ekstep.aser.lite\",\"ver\":\"5.7\"},\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; - val event5 = JSONUtils.deserialize[Event](line5); - - //getEventDate yyyy-MM-dd'T'HH:mm:ssZZ - val evDate = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ssZZ").parseLocalDate("2016-01-01T12:13:20+05:30").toDate; - CommonUtil.getEventDate(event) should be(evDate) - - //getEventTs - CommonUtil.getEventTS(event) should be(1451630600000L) - CommonUtil.getEventTS(event5) should be(1451630600000L) - CommonUtil.getEventSyncTS(event) should be(1451696362924L) - CommonUtil.getEventSyncTS(event2) should be(0L) - CommonUtil.getEventSyncTS(event3) should be(1451676562000L) - CommonUtil.getEventSyncTS(event4) should be(1451696362000L) - - CommonUtil.getEventTS(event2) should be(0) - - CommonUtil.getEventDate(event2) should be(null) - - //getGameId - CommonUtil.getGameId(event) should be("org.ekstep.aser.lite") - CommonUtil.getGameId(event2) should be(null) - - //getGameVersion - CommonUtil.getGameVersion(event) should be("5.7") - CommonUtil.getGameVersion(event2) should be(null) - - //getHourOfDay - CommonUtil.getHourOfDay(1447154514000L, 1447158114000L) should be(ListBuffer(11, 12)) - CommonUtil.getHourOfDay(1447154514000L, 1447000L) should be(ListBuffer(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0)) - - //getParallelization - val config = new JobConfig(null, None, None, null, None, None, Option(10), Option("testApp"), Option(false)); - CommonUtil.getParallelization(config) should be(10) - - val config2 = new JobConfig(null, None, None, null, None, None, None, Option("testApp"), Option(false)); - CommonUtil.getParallelization(config) should be(10) - - //getParallelization - val con = Option(Map("search" -> null, "filters" -> null, "sort" -> null, "model" -> null, "modelParams" -> null, "output" -> null, "parallelization" -> "10", "appName" -> "testApp", "deviceMapping" -> null)) - CommonUtil.getParallelization(con) should be(10) - - //getStartDate - CommonUtil.getStartDate(Option("2016-01-08"), 7) should be(Option("2016-01-01")) - CommonUtil.getStartDate(None, 0) should be(Option(LocalDate.fromDateFields(new Date).toString())) - - //getTimeDiff - CommonUtil.getTimeDiff(1451650400000L, 1451650410000L) should be(Option(10d)) - CommonUtil.getTimeDiff(1451650400000L, 1451650410000L) should be(Option(10d)) - - CommonUtil.getTimeDiff(event, event) should be(Option(0d)) - CommonUtil.getTimeDiff(event, event2) should be(Option(0d)) - - //getTimeSpent - CommonUtil.getTimeSpent("10") should be(Option(10d)) - CommonUtil.getTimeSpent(10d.asInstanceOf[AnyRef]) should be(Option(10d)) - CommonUtil.getTimeSpent(10.asInstanceOf[AnyRef]) should be(Option(10d)) - CommonUtil.getTimeSpent(null) should be(Option(0d)) - CommonUtil.getTimeSpent(true.asInstanceOf[AnyRef]) should be(Option(0d)) - - CommonUtil.getTimestamp("2016-01-02T00:59:22+P:ST") should be(1451696362000L); - - CommonUtil.roundDouble(12.7345, 2) should be(12.73); - - //gzip - val testPath = "src/test/resources/sample_telemetry.log"; - CommonUtil.gzip(testPath) - new File("src/test/resources/sample_telemetry.log.gz").isFile() should be(true) - CommonUtil.deleteFile("src/test/resources/sample_telemetry.log.gz"); - - a[Exception] should be thrownBy { - CommonUtil.gzip("src/test/resources/sample_telemetry.txt") - } - - CommonUtil.getParallelization(None) should be(10); - - CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L)) should be("1D99B2F1C6637AE21081CD981AFFB56F"); - CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L), "org.ekstep.aser.lite") should be("6D5DCB288B1A9BC3036D04C37FF08EDF"); - - CommonUtil.getMessageId("ME_TEST", "123", "MONTH", 1451650400000L, None, None) should be("D0BF57F856E3B7FAD5E47CCD4B31DE57"); - - val res = CommonUtil.time({ - - CommonUtil.getWeeksBetween(1451650400000L, 1454650400000L) should be(5) - CommonUtil.getPeriod(1451650400000L, DAY) should be(20160101) - CommonUtil.getPeriod(1451650400000L, WEEK) should be(2015753) - CommonUtil.getPeriod(1452250748000L, WEEK) should be(2016701) - CommonUtil.getPeriod(1451650400000L, MONTH) should be(201601) - CommonUtil.getPeriod(1451650400000L, CUMULATIVE) should be(0) - CommonUtil.getPeriod(1451650400000L, LAST7) should be(7) - CommonUtil.getPeriod(1451650400000L, LAST30) should be(30) - CommonUtil.getPeriod(1451650400000L, LAST90) should be(90) - CommonUtil.getPeriod(new DateTime("2016-01-01"), DAY) should be(20160101) - - }) - res._1 should be > (0L) - - //getTags - val metaData1 = Map("tags" -> List("test", "QA"), "activation_keys" -> "ptm007") - val tags1 = CommonUtil.getTags(metaData1).get - tags1.length should be(2) - - val metaData2 = Map("activation_keys" -> "ptm007", "tags" -> null) - val tags2 = CommonUtil.getTags(metaData2).get - tags2.length should be(0) - - val metaData3 = Map("activation_keys" -> "ptm007") - val tags3 = CommonUtil.getTags(metaData3).get - tags3.length should be(0) - - CommonUtil.daysBetween(new DateTime(1451650400000L).toLocalDate(), new DateTime(1454650400000L).toLocalDate()) should be(35); - } catch { - case ex: Exception => ex.printStackTrace(); - } - - CommonUtil.getPathFromURL("https://ekstep-public.s3-ap-southeast-1.amazonaws.com/ecar_files/domain_38527_1460631037666.ecar") should be("/ecar_files/domain_38527_1460631037666.ecar") - - // getPeriods - val daysArray = CommonUtil.getPeriods(DAY, 5) - daysArray.length should be(5) - - val weeksArray = CommonUtil.getPeriods(WEEK, 5) - weeksArray.length should be(5) - - val monthsArray = CommonUtil.getPeriods(MONTH, 5) - monthsArray.length should be(5) - - val cumArray = CommonUtil.getPeriods(CUMULATIVE, 5) - cumArray.length should be(1) - - CommonUtil.getPeriods("DAY", 5) - CommonUtil.getPeriods("WEEK", 5) - CommonUtil.getPeriods("MONTH", 5) - CommonUtil.getPeriods("CUMULATIVE", 5) - - //getValidTags - val dEvent1 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1474439187443,\"syncts\":1474351045034,\"ver\":\"1.0\",\"mid\":\"0C2CE73054050FE7D0E03B5A71A35829\",\"uid\":\"3b81dc76-917c-4a67-9f08-1d84b201820c\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"LearnerSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1474350532673,\"to\":1474350628540}},\"dimensions\":{\"did\":\"38e8baf2f7d2fa48fd73dac95dec23348992a907\",\"gdata\":{\"id\":\"do_30043182\",\"ver\":\"10\"},\"loc\":\"\",\"group_user\":false,\"anonymous_user\":true},\"edata\":{\"eks\":{\"levels\":[],\"noOfAttempts\":1,\"timeSpent\":95.87,\"interruptTime\":8.38,\"timeDiff\":95.87,\"start_time\":1474350532673,\"end_time\":1474350628540,\"currentLevel\":{},\"noOfLevelTransitions\":-1,\"interactEventsPerMin\":107.65,\"completionStatus\":false,\"screenSummary\":[{\"id\":\"reading_word_stage\",\"timeSpent\":5.33},{\"id\":\"homeScreen\",\"timeSpent\":6.37},{\"id\":\"writing_assess_stage\",\"timeSpent\":17.09},{\"id\":\"assessment_stage_three\",\"timeSpent\":7.72},{\"id\":\"splash\",\"timeSpent\":6.79},{\"id\":\"assessment_stage_one\",\"timeSpent\":10.92},{\"id\":\"writing_stage\",\"timeSpent\":4.36},{\"id\":\"assessment_stage_two\",\"timeSpent\":4.35},{\"id\":\"endScreen\",\"timeSpent\":4.31},{\"id\":\"reading_stage\",\"timeSpent\":20.27}],\"noOfInteractEvents\":172,\"eventsSummary\":[{\"id\":\"OE_ITEM_RESPONSE\",\"count\":1},{\"id\":\"OE_START\",\"count\":1},{\"id\":\"OE_NAVIGATE\",\"count\":13},{\"id\":\"OE_INTERACT\",\"count\":171},{\"id\":\"OE_INTERRUPT\",\"count\":2},{\"id\":\"OE_ASSESS\",\"count\":23},{\"id\":\"OE_END\",\"count\":1}],\"syncDate\":1474351045034,\"contentType\":\"Story\",\"mimeType\":\"application/vnd.ekstep.ecml-archive\",\"itemResponses\":[{\"time_stamp\":1474350574049,\"score\":1,\"timeSpent\":5.0,\"mmc\":[],\"res\":[\"5:D\"],\"resValues\":[{\"5\":\"D\"}],\"itemId\":\"esl.l3q28\",\"mc\":[]},{\"time_stamp\":1474350575965,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q29\",\"mc\":[]},{\"time_stamp\":1474350577358,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q30\",\"mc\":[]},{\"time_stamp\":1474350578565,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q31\",\"mc\":[]},{\"time_stamp\":1474350579836,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q32\",\"mc\":[]},{\"time_stamp\":1474350581019,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q33\",\"mc\":[]},{\"time_stamp\":1474350582208,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q34\",\"mc\":[]},{\"time_stamp\":1474350583517,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q35\",\"mc\":[]},{\"time_stamp\":1474350584901,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q36\",\"mc\":[]},{\"time_stamp\":1474350586118,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q37\",\"mc\":[]},{\"time_stamp\":1474350601438,\"score\":0,\"timeSpent\":9.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q38\",\"mc\":[]},{\"time_stamp\":1474350602642,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q39\",\"mc\":[]},{\"time_stamp\":1474350603195,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q40\",\"mc\":[]},{\"time_stamp\":1474350603602,\"score\":0,\"timeSpent\":0.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q41\",\"mc\":[]},{\"time_stamp\":1474350606416,\"score\":0,\"timeSpent\":3.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q42\",\"mc\":[]},{\"time_stamp\":1474350607301,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q43\",\"mc\":[]},{\"time_stamp\":1474350608010,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q44\",\"mc\":[]},{\"time_stamp\":1474350610031,\"score\":0,\"timeSpent\":2.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q45\",\"mc\":[]},{\"time_stamp\":1474350611213,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q46\",\"mc\":[]},{\"time_stamp\":1474350621749,\"score\":0,\"timeSpent\":11.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q47\",\"mc\":[]},{\"time_stamp\":1474350622758,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q48\",\"mc\":[]},{\"time_stamp\":1474350623511,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q49\",\"mc\":[]},{\"time_stamp\":1474350624180,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q50\",\"mc\":[]}]}},\"etags\":{\"app\":[]}}" - val derivedEvent1 = JSONUtils.deserialize[DerivedEvent](dEvent1); - CommonUtil.getValidTags(derivedEvent1, Array("test")) - - val dEvent2 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1474439187443,\"syncts\":1474351045034,\"ver\":\"1.0\",\"mid\":\"0C2CE73054050FE7D0E03B5A71A35829\",\"uid\":\"3b81dc76-917c-4a67-9f08-1d84b201820c\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"LearnerSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1474350532673,\"to\":1474350628540}},\"dimensions\":{\"did\":\"38e8baf2f7d2fa48fd73dac95dec23348992a907\",\"gdata\":{\"id\":\"do_30043182\",\"ver\":\"10\"},\"loc\":\"\",\"group_user\":false,\"anonymous_user\":true},\"edata\":{\"eks\":{\"levels\":[],\"noOfAttempts\":1,\"timeSpent\":95.87,\"interruptTime\":8.38,\"timeDiff\":95.87,\"start_time\":1474350532673,\"end_time\":1474350628540,\"currentLevel\":{},\"noOfLevelTransitions\":-1,\"interactEventsPerMin\":107.65,\"completionStatus\":false,\"screenSummary\":[{\"id\":\"reading_word_stage\",\"timeSpent\":5.33},{\"id\":\"homeScreen\",\"timeSpent\":6.37},{\"id\":\"writing_assess_stage\",\"timeSpent\":17.09},{\"id\":\"assessment_stage_three\",\"timeSpent\":7.72},{\"id\":\"splash\",\"timeSpent\":6.79},{\"id\":\"assessment_stage_one\",\"timeSpent\":10.92},{\"id\":\"writing_stage\",\"timeSpent\":4.36},{\"id\":\"assessment_stage_two\",\"timeSpent\":4.35},{\"id\":\"endScreen\",\"timeSpent\":4.31},{\"id\":\"reading_stage\",\"timeSpent\":20.27}],\"noOfInteractEvents\":172,\"eventsSummary\":[{\"id\":\"OE_ITEM_RESPONSE\",\"count\":1},{\"id\":\"OE_START\",\"count\":1},{\"id\":\"OE_NAVIGATE\",\"count\":13},{\"id\":\"OE_INTERACT\",\"count\":171},{\"id\":\"OE_INTERRUPT\",\"count\":2},{\"id\":\"OE_ASSESS\",\"count\":23},{\"id\":\"OE_END\",\"count\":1}],\"syncDate\":1474351045034,\"contentType\":\"Story\",\"mimeType\":\"application/vnd.ekstep.ecml-archive\",\"itemResponses\":[{\"time_stamp\":1474350574049,\"score\":1,\"timeSpent\":5.0,\"mmc\":[],\"res\":[\"5:D\"],\"resValues\":[{\"5\":\"D\"}],\"itemId\":\"esl.l3q28\",\"mc\":[]},{\"time_stamp\":1474350575965,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q29\",\"mc\":[]},{\"time_stamp\":1474350577358,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q30\",\"mc\":[]},{\"time_stamp\":1474350578565,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q31\",\"mc\":[]},{\"time_stamp\":1474350579836,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q32\",\"mc\":[]},{\"time_stamp\":1474350581019,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q33\",\"mc\":[]},{\"time_stamp\":1474350582208,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q34\",\"mc\":[]},{\"time_stamp\":1474350583517,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q35\",\"mc\":[]},{\"time_stamp\":1474350584901,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q36\",\"mc\":[]},{\"time_stamp\":1474350586118,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q37\",\"mc\":[]},{\"time_stamp\":1474350601438,\"score\":0,\"timeSpent\":9.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q38\",\"mc\":[]},{\"time_stamp\":1474350602642,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q39\",\"mc\":[]},{\"time_stamp\":1474350603195,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q40\",\"mc\":[]},{\"time_stamp\":1474350603602,\"score\":0,\"timeSpent\":0.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q41\",\"mc\":[]},{\"time_stamp\":1474350606416,\"score\":0,\"timeSpent\":3.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q42\",\"mc\":[]},{\"time_stamp\":1474350607301,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q43\",\"mc\":[]},{\"time_stamp\":1474350608010,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q44\",\"mc\":[]},{\"time_stamp\":1474350610031,\"score\":0,\"timeSpent\":2.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q45\",\"mc\":[]},{\"time_stamp\":1474350611213,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q46\",\"mc\":[]},{\"time_stamp\":1474350621749,\"score\":0,\"timeSpent\":11.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q47\",\"mc\":[]},{\"time_stamp\":1474350622758,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q48\",\"mc\":[]},{\"time_stamp\":1474350623511,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q49\",\"mc\":[]},{\"time_stamp\":1474350624180,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q50\",\"mc\":[]}]}},\"etags\":{}}" - val derivedEvent2 = JSONUtils.deserialize[DerivedEvent](dEvent2); - CommonUtil.getValidTags(derivedEvent2, Array("test")) - - val dEvent3 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1474439187443,\"syncts\":1474351045034,\"ver\":\"1.0\",\"mid\":\"0C2CE73054050FE7D0E03B5A71A35829\",\"uid\":\"3b81dc76-917c-4a67-9f08-1d84b201820c\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"LearnerSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1474350532673,\"to\":1474350628540}},\"dimensions\":{\"did\":\"38e8baf2f7d2fa48fd73dac95dec23348992a907\",\"gdata\":{\"id\":\"do_30043182\",\"ver\":\"10\"},\"loc\":\"\",\"group_user\":false,\"anonymous_user\":true},\"edata\":{\"eks\":{\"levels\":[],\"noOfAttempts\":1,\"timeSpent\":95.87,\"interruptTime\":8.38,\"timeDiff\":95.87,\"start_time\":1474350532673,\"end_time\":1474350628540,\"currentLevel\":{},\"noOfLevelTransitions\":-1,\"interactEventsPerMin\":107.65,\"completionStatus\":false,\"screenSummary\":[{\"id\":\"reading_word_stage\",\"timeSpent\":5.33},{\"id\":\"homeScreen\",\"timeSpent\":6.37},{\"id\":\"writing_assess_stage\",\"timeSpent\":17.09},{\"id\":\"assessment_stage_three\",\"timeSpent\":7.72},{\"id\":\"splash\",\"timeSpent\":6.79},{\"id\":\"assessment_stage_one\",\"timeSpent\":10.92},{\"id\":\"writing_stage\",\"timeSpent\":4.36},{\"id\":\"assessment_stage_two\",\"timeSpent\":4.35},{\"id\":\"endScreen\",\"timeSpent\":4.31},{\"id\":\"reading_stage\",\"timeSpent\":20.27}],\"noOfInteractEvents\":172,\"eventsSummary\":[{\"id\":\"OE_ITEM_RESPONSE\",\"count\":1},{\"id\":\"OE_START\",\"count\":1},{\"id\":\"OE_NAVIGATE\",\"count\":13},{\"id\":\"OE_INTERACT\",\"count\":171},{\"id\":\"OE_INTERRUPT\",\"count\":2},{\"id\":\"OE_ASSESS\",\"count\":23},{\"id\":\"OE_END\",\"count\":1}],\"syncDate\":1474351045034,\"contentType\":\"Story\",\"mimeType\":\"application/vnd.ekstep.ecml-archive\",\"itemResponses\":[{\"time_stamp\":1474350574049,\"score\":1,\"timeSpent\":5.0,\"mmc\":[],\"res\":[\"5:D\"],\"resValues\":[{\"5\":\"D\"}],\"itemId\":\"esl.l3q28\",\"mc\":[]},{\"time_stamp\":1474350575965,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q29\",\"mc\":[]},{\"time_stamp\":1474350577358,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q30\",\"mc\":[]},{\"time_stamp\":1474350578565,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q31\",\"mc\":[]},{\"time_stamp\":1474350579836,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q32\",\"mc\":[]},{\"time_stamp\":1474350581019,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q33\",\"mc\":[]},{\"time_stamp\":1474350582208,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q34\",\"mc\":[]},{\"time_stamp\":1474350583517,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q35\",\"mc\":[]},{\"time_stamp\":1474350584901,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q36\",\"mc\":[]},{\"time_stamp\":1474350586118,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q37\",\"mc\":[]},{\"time_stamp\":1474350601438,\"score\":0,\"timeSpent\":9.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q38\",\"mc\":[]},{\"time_stamp\":1474350602642,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q39\",\"mc\":[]},{\"time_stamp\":1474350603195,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q40\",\"mc\":[]},{\"time_stamp\":1474350603602,\"score\":0,\"timeSpent\":0.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q41\",\"mc\":[]},{\"time_stamp\":1474350606416,\"score\":0,\"timeSpent\":3.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q42\",\"mc\":[]},{\"time_stamp\":1474350607301,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q43\",\"mc\":[]},{\"time_stamp\":1474350608010,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q44\",\"mc\":[]},{\"time_stamp\":1474350610031,\"score\":0,\"timeSpent\":2.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q45\",\"mc\":[]},{\"time_stamp\":1474350611213,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q46\",\"mc\":[]},{\"time_stamp\":1474350621749,\"score\":0,\"timeSpent\":11.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q47\",\"mc\":[]},{\"time_stamp\":1474350622758,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q48\",\"mc\":[]},{\"time_stamp\":1474350623511,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q49\",\"mc\":[]},{\"time_stamp\":1474350624180,\"score\":0,\"timeSpent\":1.0,\"mmc\":[],\"res\":[],\"resValues\":[],\"itemId\":\"esl.l3q50\",\"mc\":[]}]}},\"etags\":{\"app\":[\"test\", \"QA\"]}}" - val derivedEvent3 = JSONUtils.deserialize[DerivedEvent](dEvent3); - val out = CommonUtil.getValidTags(derivedEvent3, Array("test")) - out.length should be(1) - - //zip - CommonUtil.zip("src/test/resources/test.zip", List("src/test/resources/sample_telemetry.log", "src/test/resources/sample_telemetry_2.log")) - new File("src/test/resources/test.zip").isFile() should be(true) - CommonUtil.deleteFile("src/test/resources/test.zip"); - //zip folder - //CommonUtil.zipFolder("src/test/resources/zipFolderTest.zip", "src/test/resources/1234/OE_INTERACT") - //new File("src/test/resources/zipFolderTest.zip").isFile() should be(true) - //CommonUtil.deleteFile("src/test/resources/zipFolderTest.zip"); - - //ccToMap - val x = CommonUtil.caseClassToMap(DerivedEvent) - - //zip dir - CommonUtil.zipDir("src/test/resources/test.zip", "src/test/resources/1234") - new File("src/test/resources/test.zip").isFile() should be(true) - CommonUtil.deleteFile("src/test/resources/test.zip"); - - //getChanneId - val event = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}" - val channelId = CommonUtil.getChannelId(JSONUtils.deserialize[Event](event)) - channelId should be("in.ekstep") - - val drivedEvent = "{\"eid\":\"ME_CE_SESSION_SUMMARY\",\"ets\":1495515314134,\"syncts\":1495456436116,\"ver\":\"1.0\",\"mid\":\"37E9E91997249D12F06C1D4869E286DE\",\"uid\":\"562\",\"content_id\":\"do_2122315986551685121193\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"ContentEditorSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1495456435738,\"to\":1495456436116}},\"dimensions\":{\"sid\":\"5edg6dsos4bun8q8utp0k9gqa0\"},\"edata\":{\"eks\":{\"interact_events_per_min\":0.0,\"start_time\":1495456435738,\"plugin_summary\":{\"loaded_count\":0,\"plugins_added\":0,\"plugins_removed\":0,\"plugins_modified\":0,\"per_plugin_summary\":[]},\"menu_events_count\":0,\"interact_events_count\":0,\"end_time\":1495456436116,\"events_summary\":[{\"id\":\"CE_API_CALL\",\"count\":3}],\"sidebar_events_count\":0,\"time_diff\":0.38,\"api_calls_count\":3,\"stage_summary\":{\"stages_added\":0,\"stages_removed\":0,\"stages_modified\":0},\"load_time\":0.0,\"save_summary\":{\"total_count\":0,\"success_count\":0,\"failed_count\":0},\"time_spent\":0.38}}}" - val channelId1 = CommonUtil.getChannelId(JSONUtils.deserialize[DerivedEvent](drivedEvent)) - channelId1 should be("in.ekstep") - - val profileEvent = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1453207660735,\"syncts\":1453207660735,\"ver\":\"1.0\",\"uid\":\"8b4f3775-6f65-4abf-9afa-b15b8f82a24b\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"GenericSessionSummarizer\",\"ver\":\"1.1\"},\"granularity\":\"SESSION\",\"dt_range\":{\"from\":1450079174000,\"to\":1450079337000}},\"dimensions\":{\"gdata\":{\"id\":\"org.ekstep.aser\",\"ver\":\"5.6.1\"},\"loc\":\"22.6370684,77.5506687\"},\"edata\":{\"eks\":{\"startTime\":1450079174000,\"noOfLevelTransitions\":1,\"levels\":[{\"choices\":[],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can do subtraction\"},{\"choices\":[\"q_4_s_hindi\",\"q_sub_q1127\",\"q_sub_q1126\"],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can read story\"}],\"activitySummary\":{\"TOUCH\":{\"count\":21,\"timeSpent\":161.0}},\"noOfAttempts\":1,\"timeSpent\":6206.0,\"interactEventsPerMin\":0.2,\"endTime\":1450079337000,\"eventsSummary\":{\"OE_START\":1,\"OE_INTERACT\":21,\"OE_ASSESS\":3,\"OE_END\":1,\"OE_LEVEL_SET\":2},\"currentLevel\":{\"numeracy\":\"Can do subtraction\",\"literacy\":\"Can read story\"},\"noOfInteractEvents\":21,\"interruptTime\":0.0,\"itemResponses\":[{\"itemId\":\"q_4_s_hindi\",\"itype\":\"recognition\",\"ilevel\":\"MEDIUM\",\"timeSpent\":29.0,\"res\":[\"अत्चा\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079266000,\"maxScore\":1,\"domain\":\"literacy\"},{\"itemId\":\"q_sub_q1127\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":33.0,\"res\":[\"49\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079299000,\"maxScore\":1,\"domain\":\"numeracy\"},{\"itemId\":\"q_sub_q1126\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":27.0,\"res\":[\"17\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079322000,\"maxScore\":1,\"domain\":\"numeracy\"}]}}}" - val channelId2 = CommonUtil.getChannelId(JSONUtils.deserialize[ProfileEvent](profileEvent)) - channelId2 should be("in.ekstep") - - // getAppDetails - val event1 = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}" - val appId = CommonUtil.getAppDetails(JSONUtils.deserialize[Event](event)) - appId.id should be("genie") - - val drivedEvent1 = "{\"eid\":\"ME_CE_SESSION_SUMMARY\",\"ets\":1495515314134,\"syncts\":1495456436116,\"ver\":\"1.0\",\"mid\":\"37E9E91997249D12F06C1D4869E286DE\",\"uid\":\"562\",\"content_id\":\"do_2122315986551685121193\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"ContentEditorSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1495456435738,\"to\":1495456436116}},\"dimensions\":{\"sid\":\"5edg6dsos4bun8q8utp0k9gqa0\"},\"edata\":{\"eks\":{\"interact_events_per_min\":0.0,\"start_time\":1495456435738,\"plugin_summary\":{\"loaded_count\":0,\"plugins_added\":0,\"plugins_removed\":0,\"plugins_modified\":0,\"per_plugin_summary\":[]},\"menu_events_count\":0,\"interact_events_count\":0,\"end_time\":1495456436116,\"events_summary\":[{\"id\":\"CE_API_CALL\",\"count\":3}],\"sidebar_events_count\":0,\"time_diff\":0.38,\"api_calls_count\":3,\"stage_summary\":{\"stages_added\":0,\"stages_removed\":0,\"stages_modified\":0},\"load_time\":0.0,\"save_summary\":{\"total_count\":0,\"success_count\":0,\"failed_count\":0},\"time_spent\":0.38}}}" - val appId1 = CommonUtil.getAppDetails(JSONUtils.deserialize[DerivedEvent](drivedEvent)) - appId1.id should be("genie") - - val profileEvent1 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1453207660735,\"syncts\":1453207660735,\"ver\":\"1.0\",\"uid\":\"8b4f3775-6f65-4abf-9afa-b15b8f82a24b\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"GenericSessionSummarizer\",\"ver\":\"1.1\"},\"granularity\":\"SESSION\",\"dt_range\":{\"from\":1450079174000,\"to\":1450079337000}},\"dimensions\":{\"gdata\":{\"id\":\"org.ekstep.aser\",\"ver\":\"5.6.1\"},\"loc\":\"22.6370684,77.5506687\"},\"edata\":{\"eks\":{\"startTime\":1450079174000,\"noOfLevelTransitions\":1,\"levels\":[{\"choices\":[],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can do subtraction\"},{\"choices\":[\"q_4_s_hindi\",\"q_sub_q1127\",\"q_sub_q1126\"],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can read story\"}],\"activitySummary\":{\"TOUCH\":{\"count\":21,\"timeSpent\":161.0}},\"noOfAttempts\":1,\"timeSpent\":6206.0,\"interactEventsPerMin\":0.2,\"endTime\":1450079337000,\"eventsSummary\":{\"OE_START\":1,\"OE_INTERACT\":21,\"OE_ASSESS\":3,\"OE_END\":1,\"OE_LEVEL_SET\":2},\"currentLevel\":{\"numeracy\":\"Can do subtraction\",\"literacy\":\"Can read story\"},\"noOfInteractEvents\":21,\"interruptTime\":0.0,\"itemResponses\":[{\"itemId\":\"q_4_s_hindi\",\"itype\":\"recognition\",\"ilevel\":\"MEDIUM\",\"timeSpent\":29.0,\"res\":[\"अत्चा\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079266000,\"maxScore\":1,\"domain\":\"literacy\"},{\"itemId\":\"q_sub_q1127\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":33.0,\"res\":[\"49\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079299000,\"maxScore\":1,\"domain\":\"numeracy\"},{\"itemId\":\"q_sub_q1126\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":27.0,\"res\":[\"17\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079322000,\"maxScore\":1,\"domain\":\"numeracy\"}]}}}" - val appId2 = CommonUtil.getAppDetails(JSONUtils.deserialize[ProfileEvent](profileEvent1)) - appId2.id should be("genie") - - //getEndTimestampOfDay - val time = CommonUtil.getEndTimestampOfDay("2016-01-02") - time.toString() should be ("1451759399000") - - // dayPeriodToLong - val dayPeriodToLong = CommonUtil.dayPeriodToLong(20170713) - dayPeriodToLong.toString should be("1499904000000") - - // getWeeksBetween - val getWeeksBetween = CommonUtil.getWeeksBetween(1499904L, 1451759399L) - getWeeksBetween should be(2) - - // getMetricEvent - val metricEvent = CommonUtil.getMetricEvent(Map("system" -> "DataProduct", "subsystem" -> "test", "metrics" -> List(V3MetricEdata("count", "100".asInstanceOf[AnyRef]))), "pipeline-monitoring", "dataproduct-metric") - metricEvent.context.pdata.get.id should be("pipeline-monitoring") - metricEvent.context.pdata.get.pid.get should be("dataproduct-metric") - - val epochToTimestamp = CommonUtil.getTimestampFromEpoch(1537550355883L) - epochToTimestamp.toString should be("2018-09-21 17:19:15.883") - - val connectionProperties = CommonUtil.getPostgresConnectionProps() - connectionProperties.getProperty("user") should be("postgres") - connectionProperties.getProperty("password") should be("postgres") - connectionProperties.getProperty("driver") should be("org.postgresql.Driver") + private case class TestCaseClass(mid: String, date: DateTime); + + class TestEventListener() { + var event: String = _; + @Subscribe def onMessage(event: String) { + this.event = event; + } + } + + it should "pass test case of all methods in CommonUtil" in { + try { + //datesBetween + val from = new LocalDate("2016-01-01"); + val to = new LocalDate("2016-01-04"); + CommonUtil.datesBetween(from, to).toArray should be(Array(new LocalDate("2016-01-01"), new LocalDate("2016-01-02"), new LocalDate("2016-01-03"), new LocalDate("2016-01-04"))) + + //deleteDirectory + val path = "delete-this"; + val dir = new File(path) + val dirCreated = dir.mkdir; + dirCreated should be(true); + val fp = "delete-this/delete-this.txt"; + val f = new File(fp); + f.createNewFile(); + CommonUtil.deleteDirectory(path) + dir.isDirectory() should be(false); + f.isFile() should be(false); + + val sc = CommonUtil.getSparkContext(1, "test", None, None); + (new HadoopFileUtil()).delete("delete-this/delete-this.txt", sc.hadoopConfiguration); + sc.stop(); + + //deleteFile + val filePath = "delete-this.txt"; + val noFile = "no-file.txt" + val file = new File(filePath); + val created = file.createNewFile(); + created should be(true); + CommonUtil.deleteFile(filePath) + CommonUtil.deleteFile(noFile) + file.isFile() should be(false); + + //getAge + val dateformat = new SimpleDateFormat("dd/MM/yyyy"); + val dob = dateformat.parse("04/07/1990"); + CommonUtil.getAge(dob) should be > (25) + + //getDatesBetween + CommonUtil.getDatesBetween("2016-01-01", Option("2016-01-04")) should be(Array("2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04")) + CommonUtil.getDatesBetween("2016-01-01", None) should not be null; + + //getEvent + val line = "{\"eid\":\"OE_START\",\"ts\":\"2016-01-01T12:13:20+05:30\",\"@timestamp\":\"2016-01-02T00:59:22.924Z\",\"ver\":\"1.0\",\"gdata\":{\"id\":\"org.ekstep.aser.lite\",\"ver\":\"5.7\"},\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; + val event = JSONUtils.deserialize[Event](line); + val line2 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; + val event2 = JSONUtils.deserialize[Event](line2); + val line3 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02T00:59:22+05:30\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; + val event3 = JSONUtils.deserialize[Event](line3); + val line4 = "{\"eid\":\"OE_START\",\"ts\":\"01-01-2016\",\"@timestamp\":\"2016-01-02T00:59:22P:ST\",\"ver\":\"1.0\",\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; + val event4 = JSONUtils.deserialize[Event](line4); + val line5 = "{\"eid\":\"OE_START\",\"ets\":1451630600000,\"@timestamp\":\"2016-01-02T00:59:22.924Z\",\"ver\":\"1.0\",\"gdata\":{\"id\":\"org.ekstep.aser.lite\",\"ver\":\"5.7\"},\"sid\":\"a6e4b3e2-5c40-4d5c-b2bd-44f1d5c7dd7f\",\"uid\":\"2ac2ebf4-89bb-4d5d-badd-ba402ee70182\",\"did\":\"828bd4d6c37c300473fb2c10c2d28868bb88fee6\",\"edata\":{\"eks\":{\"loc\":null,\"mc\":null,\"mmc\":null,\"pass\":null,\"qid\":null,\"qtype\":null,\"qlevel\":null,\"score\":0,\"maxscore\":0,\"res\":null,\"exres\":null,\"length\":null,\"exlength\":0.0,\"atmpts\":0,\"failedatmpts\":0,\"category\":null,\"current\":null,\"max\":null,\"type\":null,\"extype\":null,\"id\":null,\"gid\":null}}}"; + val event5 = JSONUtils.deserialize[Event](line5); + + //getEventDate yyyy-MM-dd'T'HH:mm:ssZZ + val evDate = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ssZZ").parseLocalDate("2016-01-01T12:13:20+05:30").toDate; + CommonUtil.getEventDate(event) should be(evDate) + + //getEventTs + CommonUtil.getEventTS(event) should be(1451630600000L) + CommonUtil.getEventTS(event5) should be(1451630600000L) + CommonUtil.getEventSyncTS(event) should be(1451696362924L) + CommonUtil.getEventSyncTS(event2) should be(0L) + CommonUtil.getEventSyncTS(event3) should be(1451676562000L) + CommonUtil.getEventSyncTS(event4) should be(1451696362000L) + + CommonUtil.getEventTS(event2) should be(0) + + CommonUtil.getEventDate(event2) should be(null) + + //getGameId + CommonUtil.getGameId(event) should be("org.ekstep.aser.lite") + CommonUtil.getGameId(event2) should be(null) + + //getGameVersion + CommonUtil.getGameVersion(event) should be("5.7") + CommonUtil.getGameVersion(event2) should be(null) + + //getHourOfDay + CommonUtil.getHourOfDay(1447154514000L, 1447158114000L) should be(ListBuffer(11, 12)) + CommonUtil.getHourOfDay(1447154514000L, 1447000L) should be(ListBuffer(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0)) + + //getParallelization + val config = new JobConfig(null, None, None, null, None, None, Option(10), Option("testApp"), Option(false)); + CommonUtil.getParallelization(config) should be(10) + + val config2 = new JobConfig(null, None, None, null, None, None, None, Option("testApp"), Option(false)); + CommonUtil.getParallelization(config) should be(10) + + //getParallelization + val con = Option(Map("search" -> null, "filters" -> null, "sort" -> null, "model" -> null, "modelParams" -> null, "output" -> null, "parallelization" -> "10", "appName" -> "testApp", "deviceMapping" -> null)) + CommonUtil.getParallelization(con) should be(10) + + //getStartDate + CommonUtil.getStartDate(Option("2016-01-08"), 7) should be(Option("2016-01-01")) + CommonUtil.getStartDate(None, 0) should be(Option(LocalDate.fromDateFields(new Date).toString())) + + //getTimeDiff + CommonUtil.getTimeDiff(1451650400000L, 1451650410000L) should be(Option(10d)) + CommonUtil.getTimeDiff(1451650400000L, 1451650410000L) should be(Option(10d)) + + CommonUtil.getTimeDiff(event, event) should be(Option(0d)) + CommonUtil.getTimeDiff(event, event2) should be(Option(0d)) + + //getTimeSpent + CommonUtil.getTimeSpent("10") should be(Option(10d)) + CommonUtil.getTimeSpent(10d.asInstanceOf[AnyRef]) should be(Option(10d)) + CommonUtil.getTimeSpent(10.asInstanceOf[AnyRef]) should be(Option(10d)) + CommonUtil.getTimeSpent(null) should be(Option(0d)) + CommonUtil.getTimeSpent(true.asInstanceOf[AnyRef]) should be(Option(0d)) + + CommonUtil.getTimestamp("2016-01-02T00:59:22+P:ST") should be(1451696362000L); + + CommonUtil.roundDouble(12.7345, 2) should be(12.73); + + //gzip + val testPath = "src/test/resources/sample_telemetry.log"; + CommonUtil.gzip(testPath) + new File("src/test/resources/sample_telemetry.log.gz").isFile() should be(true) + CommonUtil.deleteFile("src/test/resources/sample_telemetry.log.gz"); + + a[Exception] should be thrownBy { + CommonUtil.gzip("src/test/resources/sample_telemetry.txt") + } + + CommonUtil.getParallelization(None) should be(10); + + CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L)) should be("1D99B2F1C6637AE21081CD981AFFB56F"); + CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L), "org.ekstep.aser.lite") should be("6D5DCB288B1A9BC3036D04C37FF08EDF"); + + CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L), "content1", Option("app1"), Option("channel1"), "device1") should be("4DE94D28FB211D935B70DADBEB8B45EA"); + CommonUtil.getMessageId("ME_TEST", "123", "MONTH", DtRange(1451650400000L, 1451650400000L), "content1", None, None, "device1") should be("B5D001443E9BEFF7884FFB1F9B2A5CAD"); + + CommonUtil.getMessageId("ME_TEST", "INFO", 1451650400000L, Option("sunbird.app"), None) should be("C0D5CA578D9F8889CDB2C09FF4899FAC"); + CommonUtil.getMessageId("ME_TEST", "INFO", 1451650400000L, None, Option("testchannel")) should be("6625F709DD90A7423F0332826DE0F386"); + + CommonUtil.getMessageId("ME_TEST", "123", "MONTH", 1451650400000L, None, None) should be("D0BF57F856E3B7FAD5E47CCD4B31DE57"); + + val res = CommonUtil.time({ + + CommonUtil.getWeeksBetween(1451650400000L, 1454650400000L) should be(5) + CommonUtil.getPeriod(1451650400000L, DAY) should be(20160101) + CommonUtil.getPeriod(1451650400000L, WEEK) should be(2015753) + CommonUtil.getPeriod(1452250748000L, WEEK) should be(2016701) + CommonUtil.getPeriod(1451650400000L, MONTH) should be(201601) + CommonUtil.getPeriod(1451650400000L, CUMULATIVE) should be(0) + CommonUtil.getPeriod(1451650400000L, LAST7) should be(7) + CommonUtil.getPeriod(1451650400000L, LAST30) should be(30) + CommonUtil.getPeriod(1451650400000L, LAST90) should be(90) + CommonUtil.getPeriod(new DateTime("2016-01-01"), DAY) should be(20160101) + + }) + res._1 should be > (0L) + + //getTags + val metaData1 = Map("tags" -> List("test", "QA"), "activation_keys" -> "ptm007") + val tags1 = CommonUtil.getTags(metaData1).get + tags1.length should be(2) + + val metaData2 = Map("activation_keys" -> "ptm007", "tags" -> null) + val tags2 = CommonUtil.getTags(metaData2).get + tags2.length should be(0) + + val metaData3 = Map("activation_keys" -> "ptm007") + val tags3 = CommonUtil.getTags(metaData3).get + tags3.length should be(0) + + CommonUtil.daysBetween(new DateTime(1451650400000L).toLocalDate(), new DateTime(1454650400000L).toLocalDate()) should be(35); + } catch { + case ex: Exception => ex.printStackTrace(); + } + + CommonUtil.getPathFromURL("https://ekstep-public.s3-ap-southeast-1.amazonaws.com/ecar_files/domain_38527_1460631037666.ecar") should be("/ecar_files/domain_38527_1460631037666.ecar") + + // getPeriods + val daysArray = CommonUtil.getPeriods(DAY, 5) + daysArray.length should be(5) + + val weeksArray = CommonUtil.getPeriods(WEEK, 5) + weeksArray.length should be(5) + + val monthsArray = CommonUtil.getPeriods(MONTH, 5) + monthsArray.length should be(5) + + val cumArray = CommonUtil.getPeriods(CUMULATIVE, 5) + cumArray.length should be(1) + + CommonUtil.getPeriods("DAY", 5) + CommonUtil.getPeriods("WEEK", 5) + CommonUtil.getPeriods("MONTH", 5) + CommonUtil.getPeriods("CUMULATIVE", 5) + + //zip + CommonUtil.zip("src/test/resources/test.zip", List("src/test/resources/sample_telemetry.log", "src/test/resources/sample_telemetry_2.log")) + new File("src/test/resources/test.zip").isFile() should be(true) + CommonUtil.deleteFile("src/test/resources/test.zip"); + //zip folder + //CommonUtil.zipFolder("src/test/resources/zipFolderTest.zip", "src/test/resources/1234/OE_INTERACT") + //new File("src/test/resources/zipFolderTest.zip").isFile() should be(true) + //CommonUtil.deleteFile("src/test/resources/zipFolderTest.zip"); + + //ccToMap + val x = CommonUtil.caseClassToMap(DerivedEvent) + + //zip dir + CommonUtil.zipDir("src/test/resources/test.zip", "src/test/resources/1234") + new File("src/test/resources/test.zip").isFile() should be(true) + CommonUtil.deleteFile("src/test/resources/test.zip"); + + //getChanneId + val event = "{\"eid\":\"OE_INTERACT\", \"channel\": \"sunbird\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}" + val channelId = CommonUtil.getChannelId(JSONUtils.deserialize[Event](event)) + channelId should be("sunbird") + + CommonUtil.getChannelId(JSONUtils.deserialize[Event]("{\"eid\":\"OE_INTERACT\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}")) should be("in.ekstep") + + val drivedEvent = "{\"eid\":\"ME_CE_SESSION_SUMMARY\",\"ets\":1495515314134,\"syncts\":1495456436116,\"ver\":\"1.0\",\"mid\":\"37E9E91997249D12F06C1D4869E286DE\",\"uid\":\"562\",\"content_id\":\"do_2122315986551685121193\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"ContentEditorSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1495456435738,\"to\":1495456436116}},\"dimensions\":{\"sid\":\"5edg6dsos4bun8q8utp0k9gqa0\"},\"edata\":{\"eks\":{\"interact_events_per_min\":0.0,\"start_time\":1495456435738,\"plugin_summary\":{\"loaded_count\":0,\"plugins_added\":0,\"plugins_removed\":0,\"plugins_modified\":0,\"per_plugin_summary\":[]},\"menu_events_count\":0,\"interact_events_count\":0,\"end_time\":1495456436116,\"events_summary\":[{\"id\":\"CE_API_CALL\",\"count\":3}],\"sidebar_events_count\":0,\"time_diff\":0.38,\"api_calls_count\":3,\"stage_summary\":{\"stages_added\":0,\"stages_removed\":0,\"stages_modified\":0},\"load_time\":0.0,\"save_summary\":{\"total_count\":0,\"success_count\":0,\"failed_count\":0},\"time_spent\":0.38}}}" + val channelId1 = CommonUtil.getChannelId(JSONUtils.deserialize[DerivedEvent](drivedEvent)) + channelId1 should be("in.ekstep") + + val profileEvent = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1453207660735,\"syncts\":1453207660735,\"ver\":\"1.0\",\"uid\":\"8b4f3775-6f65-4abf-9afa-b15b8f82a24b\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"GenericSessionSummarizer\",\"ver\":\"1.1\"},\"granularity\":\"SESSION\",\"dt_range\":{\"from\":1450079174000,\"to\":1450079337000}},\"dimensions\":{\"gdata\":{\"id\":\"org.ekstep.aser\",\"ver\":\"5.6.1\"},\"loc\":\"22.6370684,77.5506687\"},\"edata\":{\"eks\":{\"startTime\":1450079174000,\"noOfLevelTransitions\":1,\"levels\":[{\"choices\":[],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can do subtraction\"},{\"choices\":[\"q_4_s_hindi\",\"q_sub_q1127\",\"q_sub_q1126\"],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can read story\"}],\"activitySummary\":{\"TOUCH\":{\"count\":21,\"timeSpent\":161.0}},\"noOfAttempts\":1,\"timeSpent\":6206.0,\"interactEventsPerMin\":0.2,\"endTime\":1450079337000,\"eventsSummary\":{\"OE_START\":1,\"OE_INTERACT\":21,\"OE_ASSESS\":3,\"OE_END\":1,\"OE_LEVEL_SET\":2},\"currentLevel\":{\"numeracy\":\"Can do subtraction\",\"literacy\":\"Can read story\"},\"noOfInteractEvents\":21,\"interruptTime\":0.0,\"itemResponses\":[{\"itemId\":\"q_4_s_hindi\",\"itype\":\"recognition\",\"ilevel\":\"MEDIUM\",\"timeSpent\":29.0,\"res\":[\"अत्चा\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079266000,\"maxScore\":1,\"domain\":\"literacy\"},{\"itemId\":\"q_sub_q1127\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":33.0,\"res\":[\"49\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079299000,\"maxScore\":1,\"domain\":\"numeracy\"},{\"itemId\":\"q_sub_q1126\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":27.0,\"res\":[\"17\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079322000,\"maxScore\":1,\"domain\":\"numeracy\"}]}}}" + val channelId2 = CommonUtil.getChannelId(JSONUtils.deserialize[ProfileEvent](profileEvent)) + channelId2 should be("in.ekstep") + + CommonUtil.getChannelId("") should be("in.ekstep") + + CommonUtil.getChannelId(new V3Event(null, 0l, null, null, null, null, V3Context(null, Option(V3PData("sunbird.app", Option("2.0"))), null, None, None, None, None), None, null)) should be("in.ekstep") + CommonUtil.getChannelId(new V3Event(null, 0l, null, null, null, null, V3Context("sunbird", Option(V3PData("sunbird.app", None)), null, None, None, None, None), None, null)) should be("sunbird") + CommonUtil.getChannelId(DerivedEvent(null, 0l, 0l, null, null, null, "sunbird", None, None, null, Dimensions(None, None, None, None, None, None, Option(PData("sunbird.app", "1.0"))), null)) should be("sunbird") + CommonUtil.getChannelId(DerivedEvent(null, 0l, 0l, null, null, null, "sunbird", None, None, null, Dimensions(None, None, None, None, None, None, Option(PData("sunbird.app", "1.0")), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, Option("sunbird")), null)) should be("sunbird") + CommonUtil.getChannelId(new ProfileEvent(null, null, null, null, null, null, null, null, Option(new PData("sunbird.app", "2.0")), Option("sunbird"), null)) should be("sunbird") + + // getAppDetails + val event1 = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}" + val appId = CommonUtil.getAppDetails(JSONUtils.deserialize[Event](event)) + appId.id should be("genie") + + val event2 = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"pdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}" + val appId3 = CommonUtil.getAppDetails(JSONUtils.deserialize[Event](event2)) + appId3.id should be("org.ekstep.story.en.haircut") + + val drivedEvent1 = "{\"eid\":\"ME_CE_SESSION_SUMMARY\",\"ets\":1495515314134,\"syncts\":1495456436116,\"ver\":\"1.0\",\"mid\":\"37E9E91997249D12F06C1D4869E286DE\",\"uid\":\"562\",\"content_id\":\"do_2122315986551685121193\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"ContentEditorSessionSummary\",\"ver\":\"1.0\"},\"granularity\":\"SESSION\",\"date_range\":{\"from\":1495456435738,\"to\":1495456436116}},\"dimensions\":{\"sid\":\"5edg6dsos4bun8q8utp0k9gqa0\"},\"edata\":{\"eks\":{\"interact_events_per_min\":0.0,\"start_time\":1495456435738,\"plugin_summary\":{\"loaded_count\":0,\"plugins_added\":0,\"plugins_removed\":0,\"plugins_modified\":0,\"per_plugin_summary\":[]},\"menu_events_count\":0,\"interact_events_count\":0,\"end_time\":1495456436116,\"events_summary\":[{\"id\":\"CE_API_CALL\",\"count\":3}],\"sidebar_events_count\":0,\"time_diff\":0.38,\"api_calls_count\":3,\"stage_summary\":{\"stages_added\":0,\"stages_removed\":0,\"stages_modified\":0},\"load_time\":0.0,\"save_summary\":{\"total_count\":0,\"success_count\":0,\"failed_count\":0},\"time_spent\":0.38}}}" + val appId1 = CommonUtil.getAppDetails(JSONUtils.deserialize[DerivedEvent](drivedEvent)) + appId1.id should be("genie") + + val profileEvent1 = "{\"eid\":\"ME_SESSION_SUMMARY\",\"ets\":1453207660735,\"syncts\":1453207660735,\"ver\":\"1.0\",\"uid\":\"8b4f3775-6f65-4abf-9afa-b15b8f82a24b\",\"context\":{\"pdata\":{\"id\":\"AnalyticsDataPipeline\",\"model\":\"GenericSessionSummarizer\",\"ver\":\"1.1\"},\"granularity\":\"SESSION\",\"dt_range\":{\"from\":1450079174000,\"to\":1450079337000}},\"dimensions\":{\"gdata\":{\"id\":\"org.ekstep.aser\",\"ver\":\"5.6.1\"},\"loc\":\"22.6370684,77.5506687\"},\"edata\":{\"eks\":{\"startTime\":1450079174000,\"noOfLevelTransitions\":1,\"levels\":[{\"choices\":[],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can do subtraction\"},{\"choices\":[\"q_4_s_hindi\",\"q_sub_q1127\",\"q_sub_q1126\"],\"domain\":\"\",\"noOfAttempts\":1,\"level\":\"Can read story\"}],\"activitySummary\":{\"TOUCH\":{\"count\":21,\"timeSpent\":161.0}},\"noOfAttempts\":1,\"timeSpent\":6206.0,\"interactEventsPerMin\":0.2,\"endTime\":1450079337000,\"eventsSummary\":{\"OE_START\":1,\"OE_INTERACT\":21,\"OE_ASSESS\":3,\"OE_END\":1,\"OE_LEVEL_SET\":2},\"currentLevel\":{\"numeracy\":\"Can do subtraction\",\"literacy\":\"Can read story\"},\"noOfInteractEvents\":21,\"interruptTime\":0.0,\"itemResponses\":[{\"itemId\":\"q_4_s_hindi\",\"itype\":\"recognition\",\"ilevel\":\"MEDIUM\",\"timeSpent\":29.0,\"res\":[\"अत्चा\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079266000,\"maxScore\":1,\"domain\":\"literacy\"},{\"itemId\":\"q_sub_q1127\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":33.0,\"res\":[\"49\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079299000,\"maxScore\":1,\"domain\":\"numeracy\"},{\"itemId\":\"q_sub_q1126\",\"itype\":\"ftb\",\"ilevel\":\"MEDIUM\",\"timeSpent\":27.0,\"res\":[\"17\"],\"mc\":[],\"score\":1,\"timeStamp\":1450079322000,\"maxScore\":1,\"domain\":\"numeracy\"}]}}}" + val appId2 = CommonUtil.getAppDetails(JSONUtils.deserialize[ProfileEvent](profileEvent1)) + appId2.id should be("genie") + + CommonUtil.getAppDetails(new V3Event(null, 0l, null, null, null, null, V3Context(null, Option(V3PData("sunbird.app", Option("2.0"))), null, None, None, None, None), None, null)).id should be("sunbird.app") + CommonUtil.getAppDetails(new V3Event(null, 0l, null, null, null, null, V3Context(null, Option(V3PData("sunbird.app", None)), null, None, None, None, None), None, null)).id should be("sunbird.app") + CommonUtil.getAppDetails(new V3Event(null, 0l, null, null, null, null, V3Context(null, None, null, None, None, None, None), None, null)).id should be("genie") + + CommonUtil.getAppDetails(new ProfileEvent(null, null, null, null, null, null, null, null, Option(new PData("sunbird.app", "2.0")), None, null)).id should be("sunbird.app") + CommonUtil.getAppDetails(DerivedEvent(null, 0l, 0l, null, null, null, null, None, None, null, Dimensions(None, None, None, None, None, None, Option(PData("sunbird.app", "1.0"))), null)).id should be("sunbird.app") + CommonUtil.getAppDetails("").id should be("genie"); + + //getEndTimestampOfDay + val time = CommonUtil.getEndTimestampOfDay("2016-01-02") + time.toString() should be("1451759399000") + + // dayPeriodToLong + val dayPeriodToLong = CommonUtil.dayPeriodToLong(20170713) + dayPeriodToLong.toString should be("1499904000000") + + // getWeeksBetween + val getWeeksBetween = CommonUtil.getWeeksBetween(1499904L, 1451759399L) + getWeeksBetween should be(2) + + // getMetricEvent + val metricEvent = CommonUtil.getMetricEvent(Map("system" -> "DataProduct", "subsystem" -> "test", "metrics" -> List(V3MetricEdata("count", "100".asInstanceOf[AnyRef]))), "pipeline-monitoring", "dataproduct-metric") + metricEvent.context.pdata.get.id should be("pipeline-monitoring") + metricEvent.context.pdata.get.pid.get should be("dataproduct-metric") + + val epochToTimestamp = CommonUtil.getTimestampFromEpoch(1537550355883L) + epochToTimestamp.toString should be("2018-09-21 17:19:15.883") + + val connectionProperties = CommonUtil.getPostgresConnectionProps() + connectionProperties.getProperty("user") should be("postgres") + connectionProperties.getProperty("password") should be("postgres") + connectionProperties.getProperty("driver") should be("org.postgresql.Driver") + + implicit val sc = CommonUtil.getSparkContext(10, "Test", Option("10.0.0.0"), Option("10.0.0.0")) + val defaultCaseConf = CommonUtil.setStorageConf("local", Option(""), Option("")) + + val azureStorageConf = CommonUtil.setStorageConf("azure", Option("azure_storage_key"), Option("azure_storage_secret")) + azureStorageConf.get("fs.azure") should be("org.apache.hadoop.fs.azure.NativeAzureFileSystem") + azureStorageConf.get("fs.azure.account.key.azure-test-key.blob.core.windows.net") should be("azure-test-secret") + + val s3StorageConf = CommonUtil.setStorageConf("s3", Option("aws_storage_key"), Option("aws_storage_secret")) + s3StorageConf.get("fs.s3n.awsAccessKeyId") should be("aws-test-key") + s3StorageConf.get("fs.s3n.awsSecretAccessKey") should be("aws-test-secret") + + val fileUtil = new HadoopFileUtil; + val copiedFile = fileUtil.copy("src/test/resources/sample_telemetry.log", "src/test/resources/sample_telemetry.json", sc.hadoopConfiguration) + sc.textFile(copiedFile, 1).count() should be (7437) + fileUtil.delete(sc.hadoopConfiguration, copiedFile) + + sc.stop() + } + + it should "test all the exception branches" in { + + noException should be thrownBy { + val sc = CommonUtil.getSparkContext(10, "Test", Option("10.0.0.0"), Option("10.0.0.0")); + sc.stop(); + } + + noException should be thrownBy { + val sc = CommonUtil.getSparkContext(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"), Option("10.0.0.0"), Option("2")); + sc.stop(); + } + + noException should be thrownBy { + val sc = CommonUtil.getSparkSession(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"), Option("Quorum")) + sc.stop(); } + + noException should be thrownBy { + val sc = CommonUtil.getSparkSession(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"), Option("Quorum"), Option("10.0.0.0"), Option("2")) + sc.stop(); + } + + noException should be thrownBy { + val sc = CommonUtil.getSparkSession(10, "Test", Option("10.0.0.0"), Option("10.0.0.0"), None) + sc.stop(); + } + + val event = "{\"eid\":\"OE_INTERACT\", \"channel\": \"in.ekstep\", \"ts\":\"2016-05-05T11:13:04.305+0530\",\"ets\":1462426984305,\"ver\":\"2.0\",\"gdata\":{\"id\":\"org.ekstep.story.en.haircut\",\"ver\":\"1\"},\"sid\":\"2b927be8-6a74-460b-aa20-0c991bcf57f6\",\"uid\":\"40550853-c88c-4f6b-8d33-88d0f47c32f4\",\"did\":\"d601e461a64b06f8828886e2f740e1688491a0a8\",\"edata\":{\"eks\":{\"score\":0,\"atmpts\":0,\"failedatmpts\":0,\"type\":\"LISTEN\",\"extype\":\"\",\"id\":\"splash:cover_sound\",\"stageid\":\"splash\",\"uri\":\"\",\"subtype\":\"PLAY\",\"pos\":[],\"values\":[],\"tid\":\"\",\"rating\":0.0}},\"tags\":[{\"genie\":[\"becb887fe82f24c644482eb30041da6d88bd8150\"]}],\"metadata\":{\"sync_timestamp\":\"2016-11-19T23:12:28+00:00\",\"public\":\"true\"},\"@timestamp\":\"2016-11-09T08:16:35.699Z\"}" + val v3Event = JSONUtils.deserialize[V3Event](event); + CommonUtil.getEventSyncTS(v3Event) should be(1478679395699l); + + CommonUtil.getFrameworkContext(None) should not be (null) + + noException should be thrownBy { + CommonUtil.deleteDirectory("src/test/resources/abcdefg") + } + + CommonUtil.createDirectory("src/test/resources/abcdefg") + val f = new File("src/test/resources/abcdefg") + f.exists() should be(true) + CommonUtil.deleteDirectory("src/test/resources/abcdefg") + + CommonUtil.getValidTagsForWorkflow(DerivedEvent(null, 0l, 0l, null, null, null, null, None, None, null, null, null, None, Option(List("tag1", "tag2"))), Array("tag1")).head should be("tag1") + CommonUtil.getValidTagsForWorkflow(DerivedEvent(null, 0l, 0l, null, null, null, null, None, None, null, null, null, None, None), Array("tag1")).size should be(0) + + val map = CommonUtil.caseClassToMapWithDateConversion(TestCaseClass("mid1", DateTime.now())) + map.get("mid").get should be("mid1"); + + CommonUtil.dayPeriodToLong(2020) should be(0) + + CommonUtil.getTimestampOfDayPeriod(20200101) should be(1577836800000l) + + CommonUtil.avg(List(3, 4, 5)) should be(4) + + DateTimeUtils.setCurrentMillisFixed(1577836800000L); + CommonUtil.getIntervalRange("LastDay", "telemetry-rollup-syncts") should be("2019-12-31T00:00:00+00:00/2020-01-01T00:00:00+00:00") + CommonUtil.getIntervalRange("LastDay", "summary-rollup-syncts") should be("2019-12-31T00:00:00+00:00/2020-01-01T00:00:00+00:00") + CommonUtil.getIntervalRange("LastWeek","telemetry-rollup-syncts") should be("2019-12-23T05:30:00+00:00/2019-12-30T05:30:00+00:00") + CommonUtil.getIntervalRange("LastMonth","telemetry-rollup-syncts") should be("2019-12-01T05:30:00+00:00/2020-01-01T05:30:00+00:00") + CommonUtil.getIntervalRange("Last7Days", "telemetry-rollup-syncts") should be("2019-12-25T00:00:00+00:00/2020-01-01T00:00:00+00:00") + CommonUtil.getIntervalRange("Last30Days", "telemetry-rollup-syncts") should be("2019-12-02T00:00:00+00:00/2020-01-01T00:00:00+00:00") + CommonUtil.getIntervalRange("Last30Days", "telemetry-rollup-syncts", 0) should be("2019-12-02T00:00:00+00:00/2020-01-01T00:00:00+00:00") + CommonUtil.getIntervalRange("Last30Days", "telemetry-rollup-syncts", 2) should be("2019-11-30T00:00:00+00:00/2019-12-30T00:00:00+00:00") + CommonUtil.getIntervalRange("Last60Days", "telemetry-rollup-syncts") should be("Last60Days") + DateTimeUtils.setCurrentMillisSystem(); + + CommonUtil.getGranularity("") should be(GranularityType.All) + + val eventListener = new TestEventListener(); + EventBusUtil.register(eventListener) + EventBusUtil.dipatchEvent("Test Event"); + eventListener.event should be("Test Event") + } + } \ No newline at end of file diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestDatasetUtil.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestDatasetUtil.scala new file mode 100644 index 00000000..1ea84eee --- /dev/null +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestDatasetUtil.scala @@ -0,0 +1,92 @@ +package org.ekstep.analytics.framework.util + +import org.ekstep.analytics.framework._ +import org.joda.time.LocalDate +import java.io.File + +import org.joda.time.DateTime +import java.util.Date +import java.text.SimpleDateFormat + +import org.apache.hadoop.fs.Path + +import scala.collection.mutable.ListBuffer +import org.joda.time.format.DateTimeFormat +import org.ekstep.analytics.framework.Period._ +import org.apache.spark.sql.Encoders +import org.ekstep.analytics.framework.util.DatasetUtil.extensions +import org.apache.hadoop.fs.azure.AzureException +import org.apache.hadoop.fs.s3.S3Exception +import org.apache.spark.sql.functions.col + +class TestDatasetUtil extends BaseSpec { + + "DatasetUtil" should "test the dataset extensions" in { + + val fileUtil = new HadoopFileUtil(); + val sparkSession = CommonUtil.getSparkSession(1, "TestDatasetUtil", None, None, None); + val rdd = sparkSession.sparkContext.parallelize(Seq(EnvSummary("env1", 22.1, 3), EnvSummary("env2", 20.1, 3), EnvSummary("env1", 32.1, 4)), 1); + + import sparkSession.implicits._ + val df = sparkSession.createDataFrame(rdd); + df.saveToBlobStore(StorageConfig("local", null, "src/test/resources"), "csv", "test-report", Option(Map("header" -> "true")), Option(Seq("env"))); + + val rdd2 = sparkSession.sparkContext.textFile("src/test/resources/test-report/env1.csv", 1).collect(); + rdd2.head should be ("time_spent,count") + rdd2.last should be ("32.1,4") + + df.saveToBlobStore(StorageConfig("local", null, "src/test/resources"), "csv", "test-report2", None, None); + val rdd3 = sparkSession.sparkContext.textFile("src/test/resources/test-report2.csv", 1).collect(); + rdd3.head should be ("env1,22.1,3") + rdd3.last should be ("env1,32.1,4") + + fileUtil.delete(sparkSession.sparkContext.hadoopConfiguration, "src/test/resources/test-report", "src/test/resources/test-report2", "src/test/resources/test-report2.csv"); + sparkSession.stop(); + } + + it should "test exception branches" in { + + val sparkSession = CommonUtil.getSparkSession(1, "TestDatasetUtil", None, None, None); + val rdd = sparkSession.sparkContext.parallelize(Seq(EnvSummary("env1", 22.1, 3), EnvSummary("env2", 20.1, 3), EnvSummary("env1", 32.1, 4)), 1); + + import sparkSession.implicits._ + val df = sparkSession.createDataFrame(rdd); + a[AzureException] should be thrownBy { + df.saveToBlobStore(StorageConfig("azure", "test-container", "src/test/resources"), "csv", "test-report", Option(Map("header" -> "true")), Option(Seq("env"))); + } + + a[S3Exception] should be thrownBy { + df.saveToBlobStore(StorageConfig("s3", "test-container", "src/test/resources"), "csv", "test-report", Option(Map("header" -> "true")), Option(Seq("env"))); + } + + sparkSession.stop(); + } + + "DatasetUtil" should "test the dataset copy functionality" in { + + val fileUtil = new HadoopFileUtil(); + val sparkSession = CommonUtil.getSparkSession(1, "TestDatasetUtil", None, None, None); + val rdd = sparkSession.sparkContext.parallelize(Seq(EnvSummary("env1", 22.1, 3), EnvSummary("env2", 20.1, 3), EnvSummary("env1", 32.1, 4)), 1); + + val tempDir = "src/test/resources/test-report/_tmp" + + val partitioningColumns = Option(Seq("env")); + val dims = partitioningColumns.getOrElse(Seq()); + val options = Option(Map("header" -> "true")) + val df = sparkSession.createDataFrame(rdd); + val conf = sparkSession.sparkContext.hadoopConfiguration + val filePrefix = "" + val format = "csv" + val srcFS=new Path("src/test/resources/test-report/_tmp/env=env1") + val srcDir = srcFS.getFileSystem(conf) + fileUtil.delete(sparkSession.sparkContext.hadoopConfiguration, "" + tempDir) + val opts = options.getOrElse(Map()); + df.coalesce(1).write.format(format).options(opts).partitionBy(dims: _*).save(filePrefix + tempDir); + fileUtil.copyMerge("" + "src/test/resources/test-report/_tmp/env=env1", "src/test/resources/test-report/env2.csv", sparkSession.sparkContext.hadoopConfiguration, false); + srcDir.delete(new Path("src/test/resources/test-report/_tmp/env=env1"), true) + fileUtil.delete(sparkSession.sparkContext.hadoopConfiguration, "src/test/resources/test-report", "src/test/resources/test-report2", "src/test/resources/test-report2.csv"); + fileUtil.copyMerge("" + "src/test/resources/test-report/_tmp/env=env1", "src/test/resources/test-report/env2.csv", sparkSession.sparkContext.hadoopConfiguration, false); + sparkSession.stop(); + + } +} \ No newline at end of file diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestJobLogger.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestJobLogger.scala index a85625d8..81174803 100644 --- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestJobLogger.scala +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestJobLogger.scala @@ -44,6 +44,8 @@ class TestJobLogger extends BaseSpec { JobLogger.log("testing warn method", None, WARN); JobLogger.log("testing error method", None, ERROR); + JobLogger.logEvent("test event method", "org.ekstep.analytics", WARN) + JobLogger.logEvent("test event method", "org.ekstep.analytics", DEBUG) } } \ No newline at end of file diff --git a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestRestUtil.scala b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestRestUtil.scala index c9130780..f78927b5 100644 --- a/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestRestUtil.scala +++ b/analytics-core/src/test/scala/org/ekstep/analytics/framework/util/TestRestUtil.scala @@ -1,16 +1,18 @@ package org.ekstep.analytics.framework.util +import akka.actor.ActorSystem +import akka.http.scaladsl.model.{ContentTypes, HttpEntity, HttpMethods, HttpRequest} import org.ekstep.analytics.framework.BaseSpec import org.ekstep.analytics.framework.Metadata import org.ekstep.analytics.framework.Request import org.ekstep.analytics.framework.Response import org.ekstep.analytics.framework.Search import org.ekstep.analytics.framework.SearchFilter - import com.fasterxml.jackson.core.JsonParseException import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.Params import com.google.common.net.InetAddresses +import org.ekstep.analytics.framework.fetcher.AkkaHttpUtil /** * @author Santhosh @@ -34,16 +36,20 @@ class TestRestUtil extends BaseSpec { val url = "https://httpbin.org/xml"; val response = RestUtil.get[GetR](url); response should be(null); + + val url2 = "https://httpbin.org/xml"; + val response2 = RestUtil.get[String](url2, Option(Map("Content-Type" -> "application/json"))); + response2 should not be(null); } // TODO: Need to fix the Test cases with proper request - /*it should "execute POST and parse response" in { + it should "execute POST and parse response" in { val url = "https://httpbin.org/post?type=test"; - val response = RestUtil.post[PostR](url, ""); + val response = RestUtil.post[PostR](url, "", Option(Map("accept" -> "application/json"))); response should not be null; response.url should be("https://httpbin.org/post?type=test"); InetAddresses.isInetAddress(response.origin) should be(true); - } */ + } it should "throw Exception if unable to parse the response during POST" in { val url = "https://httpbin.org/post?type=test"; @@ -53,16 +59,43 @@ class TestRestUtil extends BaseSpec { } // TODO: Need to fix the Test cases with proper request - /*it should "execute PATCH and parse response" in { + it should "execute PATCH and parse response" in { val url = "https://httpbin.org/patch?type=test"; val request = Map("popularity" -> 1); - val response = RestUtil.patch[PostR](url, JSONUtils.serialize(request)); + val response = RestUtil.patch[PostR](url, JSONUtils.serialize(request), Option(Map("accept" -> "application/json"))); response should not be null; response.url should be("https://httpbin.org/patch?type=test"); InetAddresses.isInetAddress(response.origin) should be(true); response.data should be("{\"popularity\":1}"); response.json.get("popularity").get should be(1); - } */ + } + + it should "execute PUT and parse response" in { + val url = "https://httpbin.org/put?type=test"; + val request = Map("popularity" -> 1); + val response = RestUtil.put[PostR](url, JSONUtils.serialize(request), Option(Map("accept" -> "application/json"))); + response should not be null; + response.url should be("https://httpbin.org/put?type=test"); + InetAddresses.isInetAddress(response.origin) should be(true); + response.data should be("{\"popularity\":1}"); + response.json.get("popularity").get should be(1); + + val url2 = "https://httpbin.org/put?type=test"; + val response2 = RestUtil.put[PostErrR](url2, JSONUtils.serialize(request)); + response2 should be(null); + } + + it should "execute Delete and parse response" in { + val url = "https://httpbin.org/delete"; + val response = RestUtil.delete[PostR](url, Option(Map("accept" -> "application/json"))); + response should not be null; + response.url should be("https://httpbin.org/delete"); + InetAddresses.isInetAddress(response.origin) should be(true); + + val url2 = "https://httpbin.org/delete/xml"; + val response2 = RestUtil.delete[PostErrR](url2, None); + response2 should be (null); + } it should "throw Exception if unable to parse the response during PATCH" in { val url = "https://httpbin.org/patch?type=test"; @@ -71,4 +104,15 @@ class TestRestUtil extends BaseSpec { response should be(null); } + it should "should test akka util" in { + val url = "https://httpbin.org/patch?type=test"; + implicit val system=ActorSystem("Test") + val request = HttpRequest(method = HttpMethods.POST, + uri = url, + entity = HttpEntity(ContentTypes.`application/json`, JSONUtils.serialize(Map("popularity" -> 1)))) + val response = AkkaHttpUtil.sendRequest(request) + response should not be (null); + system.terminate() + } + } \ No newline at end of file diff --git a/analytics-job-driver/pom.xml b/analytics-job-driver/pom.xml index a4db3ba0..7a83b08d 100644 --- a/analytics-job-driver/pom.xml +++ b/analytics-job-driver/pom.xml @@ -42,13 +42,23 @@ commons-codec commons-codec + + org.apache.hadoop + hadoop-client + + + org.apache.hadoop + hadoop-client + 2.7.3 + provided + org.apache.spark spark-streaming-kafka_${scala.maj.version} - 1.6.2 + 1.6.3 org.apache.kafka @@ -64,19 +74,13 @@ org.scalatest scalatest_${scala.maj.version} - 2.2.4 + 3.0.5 test - - analytics-framework-1.0 + analytics-framework-2.0 src/main/scala src/test/scala diff --git a/analytics-job-driver/src/main/scala/org/ekstep/analytics/framework/driver/BatchJobDriver.scala b/analytics-job-driver/src/main/scala/org/ekstep/analytics/framework/driver/BatchJobDriver.scala index b4dd9ea4..564f66b3 100644 --- a/analytics-job-driver/src/main/scala/org/ekstep/analytics/framework/driver/BatchJobDriver.scala +++ b/analytics-job-driver/src/main/scala/org/ekstep/analytics/framework/driver/BatchJobDriver.scala @@ -36,7 +36,9 @@ object BatchJobDriver { } val autocloseSC = if (sc.isEmpty) true else false; val frameworkContext = if (fc.isEmpty) { - CommonUtil.getFrameworkContext(Option(Array((AppConf.getConfig("cloud_storage_type"), AppConf.getConfig("cloud_storage_type"), AppConf.getConfig("cloud_storage_type"))))); + val storageKey = config.modelParams.getOrElse(Map()).getOrElse("storageKeyConfig", "azure_storage_key").asInstanceOf[String] + val storageSecret = config.modelParams.getOrElse(Map()).getOrElse("storageSecretConfig", "azure_storage_secret").asInstanceOf[String] + CommonUtil.getFrameworkContext(Option(Array((AppConf.getConfig("cloud_storage_type"), storageKey, storageSecret)))); } else { fc.get } @@ -57,33 +59,43 @@ object BatchJobDriver { private def _process[T, R](config: JobConfig, models: List[IBatchModel[T, R]])(implicit mf: Manifest[T], mfr: Manifest[R], sc: SparkContext, fc: FrameworkContext) { - val rdd = DataFetcher.fetchBatchData[T](config.search).cache(); - val count = rdd.count; + fc.inputEventsCount = sc.longAccumulator("InputEventsCount"); + fc.outputEventsCount = sc.longAccumulator("OutputEventsCount"); + val rdd = DataFetcher.fetchBatchData[T](config.search); val data = DataFilter.filterAndSort[T](rdd, config.filters, config.sort); models.foreach { model => - JobContext.jobName = model.name // TODO: It is not necessary that the end date always exists. The below log statement might throw exceptions + // $COVERAGE-OFF$ + fc.outputEventsCount.reset(); val endDate = config.search.queries.getOrElse(Array(Query())).last.endDate - JobLogger.start("Started processing of " + model.name, Option(Map("config" -> config, "model" -> model.name, "date" -> endDate))); + // $COVERAGE-ON$ + val modelName = if(config.modelParams.nonEmpty && config.modelParams.get.get("modelName").nonEmpty) + config.modelParams.get.get("modelName").get.asInstanceOf[String] + else model.name + JobContext.jobName = modelName + JobLogger.start("Started processing of " + modelName, Option(Map("config" -> config, "model" -> model.name, "date" -> endDate))); try { val result = _processModel(config, data, model); // generate metric event and push it to kafka topic - val date = if (endDate.isEmpty) new DateTime().toString(CommonUtil.dateFormat) else endDate.get - val metrics = List(V3MetricEdata("date", date.asInstanceOf[AnyRef]), V3MetricEdata("inputEvents", count.asInstanceOf[AnyRef]), - V3MetricEdata("outputEvents", result._2.asInstanceOf[AnyRef]), V3MetricEdata("timeTakenSecs", Double.box(result._1 / 1000).asInstanceOf[AnyRef])) - val metricEvent = CommonUtil.getMetricEvent(Map("system" -> "DataProduct", "subsystem" -> model.name, "metrics" -> metrics), AppConf.getConfig("metric.producer.id"), AppConf.getConfig("metric.producer.pid")) + val metrics = List(Map("id" -> "input-events", "value" -> fc.inputEventsCount.value.asInstanceOf[AnyRef]), Map("id" -> "output-events", "value" -> result._2.asInstanceOf[AnyRef]), Map("id" -> "time-taken-secs", "value" -> Double.box(result._1 / 1000).asInstanceOf[AnyRef])) + val metricEvent = getMetricJson(model.name, endDate, "SUCCESS", metrics) + // $COVERAGE-OFF$ if (AppConf.getConfig("push.metrics.kafka").toBoolean) - KafkaDispatcher.dispatch(Array(JSONUtils.serialize(metricEvent)), Map("topic" -> AppConf.getConfig("metric.kafka.topic"), "brokerList" -> AppConf.getConfig("metric.kafka.broker"))) + KafkaDispatcher.dispatch(Array(metricEvent), Map("topic" -> AppConf.getConfig("metric.kafka.topic"), "brokerList" -> AppConf.getConfig("metric.kafka.broker"))) + // $COVERAGE-ON$ - JobLogger.end(model.name + " processing complete", "SUCCESS", Option(Map("model" -> model.name, "date" -> endDate, "inputEvents" -> count, "outputEvents" -> result._2, "timeTaken" -> Double.box(result._1 / 1000)))); + JobLogger.end(modelName + " processing complete", "SUCCESS", Option(Map("model" -> model.name, "date" -> endDate, "inputEvents" -> fc.inputEventsCount.value, "outputEvents" -> result._2, "timeTaken" -> Double.box(result._1 / 1000)))); } catch { case ex: Exception => JobLogger.log(ex.getMessage, None, ERROR); - JobLogger.end(model.name + " processing failed", "FAILED", Option(Map("model" -> model.name, "date" -> endDate, "inputEvents" -> count, "statusMsg" -> ex.getMessage))); + JobLogger.end(modelName + " processing failed", "FAILED", Option(Map("model" -> model.name, "date" -> endDate, "statusMsg" -> ex.getMessage))); + val metricEvent = getMetricJson(model.name, endDate, "FAILED", List(Map("id" -> "input-events", "value" -> fc.inputEventsCount.value.asInstanceOf[AnyRef]))) + // $COVERAGE-OFF$ + if (AppConf.getConfig("push.metrics.kafka").toBoolean) + KafkaDispatcher.dispatch(Array(metricEvent), Map("topic" -> AppConf.getConfig("metric.kafka.topic"), "brokerList" -> AppConf.getConfig("metric.kafka.broker"))) + // $COVERAGE-ON$ ex.printStackTrace(); - } finally { - rdd.unpersist() } } } @@ -92,11 +104,18 @@ object BatchJobDriver { CommonUtil.time({ val output = model.execute(data, config.modelParams); - // JobContext.recordRDD(output); val count = OutputDispatcher.dispatch(config.output, output); - // JobContext.cleanUpRDDs(); - count; + fc.outputEventsCount.value }) } + + def getMetricJson(subsystem: String, endDate: Option[String], status: String, metrics: List[Map[String, AnyRef]]): String = { + // $COVERAGE-OFF$ + val date = if (endDate.isEmpty) new DateTime().toString(CommonUtil.dateFormat) else endDate.get + // $COVERAGE-ON$ + val dims = List(Map("id" -> "report-date", "value" -> date), Map("id" -> "status", "value" -> status)) + val metricEvent = Map("metricts" -> System.currentTimeMillis(), "system" -> "DataProduct", "subsystem" -> subsystem, "metrics" -> metrics, "dimensions" -> dims) + JSONUtils.serialize(metricEvent) + } } \ No newline at end of file diff --git a/analytics-job-driver/src/test/resources/application.conf b/analytics-job-driver/src/test/resources/application.conf index 759fae4b..41ac850e 100644 --- a/analytics-job-driver/src/test/resources/application.conf +++ b/analytics-job-driver/src/test/resources/application.conf @@ -19,4 +19,6 @@ metric.producer.id="pipeline.monitoring" metric.producer.pid="dataproduct.metrics" push.metrics.kafka=false metric.kafka.broker="localhost:9092" -metric.kafka.topic="metric" \ No newline at end of file +metric.kafka.topic="metric" + +cloud_storage_type=azure \ No newline at end of file diff --git a/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver.scala b/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver.scala index 3f5e633e..fbc9d819 100644 --- a/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver.scala +++ b/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver.scala @@ -24,7 +24,7 @@ object TestModel2 extends IBatchModel[MeasuredEvent, String] with Serializable { object TestModel3 extends IBatchModel[MeasuredEvent, String] with Serializable { - def execute(events: RDD[MeasuredEvent], jobParams: Option[Map[String, AnyRef]])(implicit sc: SparkContext, fc: FrameworkContext): RDD[String] = { + def execute(events: RDD[MeasuredEvent], jobParams: Option[Map[String, AnyRef]])(implicit sc: SparkContext, fc: FrameworkContext): RDD[String] = { val contents = events.map { x => x.content_id.getOrElse("") } contents; } @@ -52,7 +52,7 @@ class TestJobDriver extends FlatSpec with Matchers with BeforeAndAfterAll { Option(Array[Filter](Filter("eventId", "IN", Option(Array("OE_ASSESS", "OE_START", "OE_END", "OE_LEVEL_SET"))))), None, "org.ekstep.analytics.framework.TestModel", - Option(Map()), + Option(Map("modelName" -> "TestModelJob")), Option(Array(Dispatcher("console", Map("printEvent" -> false.asInstanceOf[AnyRef])))), Option(8), None, @@ -176,6 +176,7 @@ class TestJobDriver extends FlatSpec with Matchers with BeforeAndAfterAll { JobDriver.run[MeasuredEvent, String]("batch", JSONUtils.serialize(jobConfig), models, "TestMergeJobs"); CommonUtil.closeSparkContext()(sc.get); } + } it should "run the stream job driver on multiple models" in { @@ -198,4 +199,5 @@ class TestJobDriver extends FlatSpec with Matchers with BeforeAndAfterAll { JobDriver.run("batch", JSONUtils.serialize(""), models, "TestMergeJobs"); } } -} \ No newline at end of file +} + diff --git a/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver2.scala b/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver2.scala new file mode 100644 index 00000000..97c7b546 --- /dev/null +++ b/analytics-job-driver/src/test/scala/org/ekstep/analytics/framework/TestJobDriver2.scala @@ -0,0 +1,49 @@ +package org.ekstep.analytics.framework + +import org.scalatest.FlatSpec +import org.scalatest.Matchers +import org.scalatest.BeforeAndAfterAll +import org.ekstep.analytics.framework.util.CommonUtil +import org.ekstep.analytics.framework.util.JSONUtils +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext + +case class Dummy(event:String) extends AlgoInput with AlgoOutput with Output +object TestModel4 extends IBatchModelTemplate[Event, Dummy, Dummy, Dummy] with Serializable { + + override def preProcess(events: RDD[Event], config: Map[String, AnyRef])(implicit sc: SparkContext, fc: FrameworkContext): RDD[Dummy] = { + events.map { x => Dummy(JSONUtils.serialize(x)) }; + } + + override def algorithm(events: RDD[Dummy], config: Map[String, AnyRef])(implicit sc: SparkContext, fc: FrameworkContext): RDD[Dummy] = { + events + } + + override def postProcess(events: RDD[Dummy], config: Map[String, AnyRef])(implicit sc: SparkContext, fc: FrameworkContext): RDD[Dummy] = { + events + } + + override def name: String = "TestModel4"; +} + +class TestJobDriver2 extends FlatSpec with Matchers with BeforeAndAfterAll { + + it should "run the batch job driver on model implementing BatchModelTemplate" in { + + val jobConfig = JobConfig( + Fetcher("local", None, Option(Array(Query(None, None, None, None, None, None, None, None, None, Option("src/test/resources/sample_telemetry.log"))))), + Option(Array[Filter](Filter("eventId", "EQ", Option("OE_START")))), + None, + "org.ekstep.analytics.framework.TestModel2", + Option(Map()), + Option(Array(Dispatcher("console", Map("printEvent" -> false.asInstanceOf[AnyRef])))), + Option(8), + None, + None) + + implicit val sc = Option(CommonUtil.getSparkContext(1, "Test")); + implicit val fc:Option[FrameworkContext] = None; + JobDriver.run("batch", JSONUtils.serialize(jobConfig), TestModel4); + CommonUtil.closeSparkContext()(sc.get); + } +} \ No newline at end of file diff --git a/auto_build_deploy b/auto_build_deploy new file mode 100644 index 00000000..d9415a7d --- /dev/null +++ b/auto_build_deploy @@ -0,0 +1,53 @@ +@Library('deploy-conf') _ +node('build-slave') { + try { + String ANSI_GREEN = "\u001B[32m" + String ANSI_NORMAL = "\u001B[0m" + String ANSI_BOLD = "\u001B[1m" + String ANSI_RED = "\u001B[31m" + String ANSI_YELLOW = "\u001B[33m" + ansiColor('xterm') { + stage('Checkout') { + tag_name = env.JOB_NAME.split("/")[-1] + if (!tag_name.contains(env.public_repo_branch)) { + println("Error.. Tag does not contain " + env.public_repo_branch) + error("Oh ho! Tag is not a release candidate.. Skipping build") + } + cleanWs() + def scmVars = checkout scm + checkout scm: [$class: 'GitSCM', branches: [[name: "refs/tags/$tag_name"]], userRemoteConfigs: [[url: scmVars.GIT_URL]]] + commit_hash = sh(script: 'git rev-parse --short HEAD', returnStdout: true).trim() + artifact_version = tag_name + "_" + commit_hash + echo "artifact_version: "+ artifact_version + } + } + + stage('Build') { + sh ''' + mvn clean install -DskipTests + ''' + } + stage('Archive artifacts'){ + sh """ + mkdir lpa_core_artifacts + cp analytics-job-driver/target/analytics-framework-2.0.jar lpa_core_artifacts + cp analytics-core/lib/scruid*.jar lpa_core_artifacts + zip -j lpa_core_artifacts.zip:${artifact_version} lpa_core_artifacts/* + """ + archiveArtifacts artifacts: "lpa_core_artifacts.zip:${artifact_version}", fingerprint: true, onlyIfSuccessful: true + sh """echo {\\"artifact_name\\" : \\"lpa_core_artifacts.zip\\", \\"artifact_version\\" : \\"${artifact_version}\\", \\"node_name\\" : \\"${env.NODE_NAME}\\"} > metadata.json""" + archiveArtifacts artifacts: 'metadata.json', onlyIfSuccessful: true + currentBuild.description = artifact_version + } + currentBuild.result = "SUCCESS" + slack_notify(currentBuild.result, tag_name) + email_notify() + auto_build_deploy() + } + catch (err) { + currentBuild.result = "FAILURE" + slack_notify(currentBuild.result, tag_name) + email_notify() + throw err + } +} diff --git a/pom.xml b/pom.xml index df744762..2b633ea0 100644 --- a/pom.xml +++ b/pom.xml @@ -14,8 +14,8 @@ 1.1.1 2.11 2.11.11 - 2.0 - 2.0.1 + 2.4 + 2.4.4 @@ -30,6 +30,15 @@ + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 8 + 8 + + maven-assembly-plugin 2.3