diff --git a/src/main/scala/io/opentargets/etl/backend/Search.scala b/src/main/scala/io/opentargets/etl/backend/Search.scala index acbff024..154821ec 100644 --- a/src/main/scala/io/opentargets/etl/backend/Search.scala +++ b/src/main/scala/io/opentargets/etl/backend/Search.scala @@ -782,9 +782,10 @@ object Search extends LazyLogging { // TODO check the overall score column name logger.info("subselect indirect LLR associations just id and score and persist") + val associationScores = inputDataFrame("association").data .withColumn("associationId", concat_ws("-", col("diseaseId"), col("targetId"))) - .withColumnRenamed("overallDatasourceHarmonicScore", "score") + .withColumnRenamed("associationScore", "score") .select(associationColumns.head, associationColumns.tail: _*) .persist(StorageLevel.DISK_ONLY) diff --git a/src/main/scala/io/opentargets/etl/backend/SearchEBI.scala b/src/main/scala/io/opentargets/etl/backend/SearchEBI.scala index 5950ec69..6b04e779 100644 --- a/src/main/scala/io/opentargets/etl/backend/SearchEBI.scala +++ b/src/main/scala/io/opentargets/etl/backend/SearchEBI.scala @@ -4,6 +4,7 @@ import com.typesafe.scalalogging.LazyLogging import org.apache.spark.sql._ import io.opentargets.etl.backend.spark.{IOResource, IoHelpers} import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions.col object SearchEBI extends LazyLogging { @@ -17,13 +18,24 @@ object SearchEBI extends LazyLogging { val datasetAssociations = associationsDirectOverall .join(targets, Seq("targetId"), "inner") .join(diseases, Seq("diseaseId"), "inner") - .select("targetId", "diseaseId", "approvedSymbol", "name", "score") + .select( + col("targetId"), + col("diseaseId"), + col("approvedSymbol"), + col("name"), + col("associationScore").alias("score") + ) val datasetEvidence = evidence .join(targets, Seq("targetId"), "inner") .join(diseases, Seq("diseaseId"), "inner") - .select("targetId", "diseaseId", "approvedSymbol", "name", "score") - + .select( + col("targetId"), + col("diseaseId"), + col("approvedSymbol"), + col("name"), + col("associationScore").alias("score") + ) Map( "ebisearchAssociations" -> datasetAssociations, "ebisearchEvidence" -> datasetEvidence