diff --git a/src/backend_api/app/plotting/README.md b/src/backend_api/app/plotting/README.md new file mode 100644 index 00000000..404eee7d --- /dev/null +++ b/src/backend_api/app/plotting/README.md @@ -0,0 +1,169 @@ +A potential way to do data aggregation in pure sql. Not complete yet + +```sql +WITH "baseQuery" AS +( + SELECT item."itemId" AS "itemId", item."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", item.league AS league, item."itemBaseTypeId" AS "itemBaseTypeId", item."currencyId" AS "currencyId", item."currencyAmount" AS "currencyAmount", currency."tradeName" AS "tradeName", currency."valueInChaos" AS "valueInChaos", currency."createdHoursSinceLaunch" AS "currencyCreatedHoursSinceLaunch" + FROM item JOIN currency ON item."currencyId" = currency."currencyId" + WHERE (item.league = 'Mercenaries') + -- AND (item."createdHoursSinceLaunch" >3000) + AND ( + EXISTS ( + SELECT 1 + FROM item_modifier + WHERE item."itemId" = item_modifier."itemId" AND item_modifier."modifierId" = 2 + ) + ) AND true +), +"mostCommon" AS + ( + SELECT "baseQuery"."tradeName" AS "mostCommonTradeName", count("baseQuery"."tradeName") AS "nameCount" + FROM "baseQuery" GROUP BY "baseQuery".league, "baseQuery"."tradeName" ORDER BY "nameCount" DESC + LIMIT 1 + ), +"mostCommonIds" AS +( + SELECT "baseQuery"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", max("baseQuery"."currencyId") AS "mostCommonCurrencyId" + FROM "baseQuery" + WHERE "baseQuery"."tradeName" = (SELECT "mostCommon"."mostCommonTradeName" FROM "mostCommon") + GROUP BY "baseQuery"."createdHoursSinceLaunch" +), +"mostCommonPrices" AS +( + SELECT "baseQuery"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", min("baseQuery"."valueInChaos") AS "mostCommonValueInChaos", min("baseQuery"."tradeName") AS "mostCommonCurrencyUsed" + FROM "baseQuery" JOIN "mostCommonIds" ON "baseQuery"."createdHoursSinceLaunch" = "mostCommonIds"."createdHoursSinceLaunch" AND "baseQuery"."currencyId" = "mostCommonIds"."mostCommonCurrencyId" + GROUP BY "baseQuery"."createdHoursSinceLaunch" +), +prices AS +( + SELECT "baseQuery"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", "baseQuery".league AS league, "baseQuery"."currencyAmount" * "baseQuery"."valueInChaos" AS "valueInChaos", ("baseQuery"."currencyAmount" * "baseQuery"."valueInChaos") / CAST("mostCommonPrices"."mostCommonValueInChaos" AS FLOAT(4)) AS "valueInMostCommonCurrencyUsed", "mostCommonPrices"."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed" + FROM "baseQuery" JOIN "mostCommonPrices" ON "baseQuery"."createdHoursSinceLaunch" = "mostCommonPrices"."createdHoursSinceLaunch" +), +"rankedPrices" AS +( + SELECT + prices."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", + prices.league AS league, + prices."valueInChaos" AS "valueInChaos", + prices."valueInMostCommonCurrencyUsed" AS "valueInMostCommonCurrencyUsed", + prices."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed", + rank() OVER (PARTITION BY prices."createdHoursSinceLaunch" ORDER BY prices."valueInChaos" ASC) AS pos + FROM prices +), +"filteredPrices" AS +( + SELECT "rankedPrices"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", "rankedPrices".league AS league, "rankedPrices"."valueInChaos" AS "valueInChaos", "rankedPrices"."valueInMostCommonCurrencyUsed" AS "valueInMostCommonCurrencyUsed", "rankedPrices"."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed", CASE WHEN ("rankedPrices".pos < 10) THEN 'low' WHEN ("rankedPrices".pos < 15) THEN 'medium' ELSE 'high' END AS confidence + FROM "rankedPrices" + WHERE "rankedPrices".pos <= 20 + ORDER BY "rankedPrices"."createdHoursSinceLaunch" +), +"jsonReady" AS +( + SELECT + "filteredPrices"."createdHoursSinceLaunch" AS "hoursSinceLaunch", + "filteredPrices".league, + avg("filteredPrices"."valueInChaos") AS "valueInChaos", + avg("filteredPrices"."valueInMostCommonCurrencyUsed") AS "valueInMostCommonCurrencyUsed", + min("filteredPrices"."mostCommonCurrencyUsed") AS "mostCommonCurrencyUsed", + min("filteredPrices".confidence) AS confidence + FROM "filteredPrices" + GROUP BY "filteredPrices"."createdHoursSinceLaunch", "filteredPrices".league + ORDER BY "filteredPrices"."createdHoursSinceLaunch" +), +"overallConfidence" AS +( + SELECT + "jsonReady".league AS "name", + "jsonReady".confidence AS "confidenceRating", + RANK() OVER ( + PARTITION BY "jsonReady".league + ORDER BY COUNT("jsonReady".confidence) + ASC + ) + FROM "jsonReady" + GROUP BY + "name", + "confidenceRating" +), +"overallMostCommonCurrencyUsed" AS ( + SELECT + "jsonReady".league AS "name", + "jsonReady"."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed", + RANK() OVER ( + PARTITION BY "jsonReady".league + ORDER BY COUNT("jsonReady"."mostCommonCurrencyUsed") + ASC + ) + FROM "jsonReady" + GROUP BY + "name", + "mostCommonCurrencyUsed" +), "timeSeriesData" AS ( + SELECT + "jsonReady".league AS "name", + json_agg( + json_build_object( + 'hoursSinceLaunch', "jsonReady"."hoursSinceLaunch", + 'valueInChaos', "jsonReady"."valueInChaos", + 'valueInMostCommonCurrencyUsed', "jsonReady"."valueInMostCommonCurrencyUsed", + 'confidence', "jsonReady"."confidence" + ) + ) AS "data", + "overallConfidence"."confidenceRating" + FROM "jsonReady" JOIN "overallConfidence" ON "jsonReady".league = "overallConfidence".name + GROUP BY "jsonReady".league, "overallConfidence"."confidenceRating" +) + + + +SELECT + json_build_object( + 'mostCommonCurrencyUsed', MIN("overallMostCommonCurrencyUsed"."mostCommonCurrencyUsed"), + 'data', json_agg( + json_build_object( + 'name', "timeSeriesData".name, + 'data', "timeSeriesData".data, + 'confidenceRating', "timeSeriesData"."confidenceRating" + ) + ) + ) +FROM "timeSeriesData" NATURAL JOIN "overallMostCommonCurrencyUsed" + +-- SELECT *, EXP(SUM(LN(multi)) OVER (PARTITION BY league, "createdHoursSinceLaunch", "clusterId" ORDER BY "valueInChaos" RANGE UNBOUNDED PRECEDING)) AS "cumMulti" +-- FROM + +-- ( +-- SELECT *, SUM(is_new_cluster) OVER (PARTITION BY league, "createdHoursSinceLaunch" ORDER BY "valueInChaos" RANGE UNBOUNDED PRECEDING) AS "clusterId" +-- FROM +-- (SELECT +-- *, +-- "valueInChaos" / "valueInChaosPrev" AS multi, +-- "valueInChaos" / "valueInChaos2Prev" AS multi2, +-- CASE +-- WHEN (("valueInChaos" / "valueInChaosPrev") > 1.05) OR (("valueInChaos" / "valueInChaos2Prev") > 1.1) +-- THEN 1 +-- ELSE +-- 0 +-- END AS is_new_cluster +-- FROM( +-- SELECT +-- *, +-- LAG("valueInChaos", CEIL("nPoints"*0.05)::INT, null) OVER (PARTITION BY league, "createdHoursSinceLaunch" ORDER BY "valueInChaos") AS "valueInChaosPrev", +-- LAG( +-- "valueInChaos", +-- CEIL("nPoints"*0.1)::INT, +-- null +-- ) OVER (PARTITION BY league, "createdHoursSinceLaunch" ORDER BY "valueInChaos") AS "valueInChaos2Prev" +-- -- FROM prices +-- FROM +-- ( +-- SELECT *, +-- COUNT(*) OVER (PARTITION BY league, "createdHoursSinceLaunch") AS "nPoints" +-- FROM prices +-- ) +-- WHERE TRUE +-- -- WHERE league = 'Phrecia' +-- AND "createdHoursSinceLaunch" = 2658 +-- ORDER BY "createdHoursSinceLaunch" +-- ))) +``` diff --git a/src/backend_api/app/plotting/plotter.py b/src/backend_api/app/plotting/plotter.py index 29b101d5..87be50ce 100644 --- a/src/backend_api/app/plotting/plotter.py +++ b/src/backend_api/app/plotting/plotter.py @@ -539,10 +539,10 @@ def _create_plot_statement(self, query: IdentifiedPlotQuery) -> Select: .cte("filteredPrices") ) - final_query = ( + json_ready = ( select( filtered_prices.c.createdHoursSinceLaunch.label("hoursSinceLaunch"), - filtered_prices.c.league, + filtered_prices.c.league.label("name"), func.avg(filtered_prices.c.valueInChaos).label("valueInChaos"), func.avg(filtered_prices.c.valueInMostCommonCurrencyUsed).label( "valueInMostCommonCurrencyUsed" @@ -556,22 +556,85 @@ def _create_plot_statement(self, query: IdentifiedPlotQuery) -> Select: filtered_prices.c.createdHoursSinceLaunch, filtered_prices.c.league ) .order_by(filtered_prices.c.createdHoursSinceLaunch) + ).cte("jsonReady") + + overall_confidence = ( + select( + json_ready.c.name, + json_ready.c.confidence.label("confidenceRating"), + func.rank().over( + partition_by=json_ready.c.name, + order_by=func.count(json_ready.c.confidence), + ), + ) + .group_by(json_ready.c.name, json_ready.c.confidence) + .cte("overallConfidence") + ) + + overall_most_common_currency_used = ( + select( + json_ready.c.name, + json_ready.c.mostCommonCurrencyUsed.label("mostCommonCurrencyUsed"), + func.rank().over( + partition_by=json_ready.c.name, + order_by=func.count(json_ready.c.mostCommonCurrencyUsed), + ), + ) + .group_by(json_ready.c.name, json_ready.c.mostCommonCurrencyUsed) + .cte("overallMostCommonCurrencyUsed") + ) + + time_series_data = ( + select( + json_ready.c.name, + func.json_agg( + func.json_build_object( + "hoursSinceLaunch", + json_ready.c.hoursSinceLaunch, + "valueInChaos", + json_ready.c.valueInChaos, + "valueInMostCommonCurrencyUsed", + json_ready.c.valueInMostCommonCurrencyUsed, + "confidence", + json_ready.c.confidence, + ) + ).label("data"), + overall_confidence.c.confidenceRating, + ) + .select_from(json_ready) + .join( + overall_confidence, + json_ready.c.name == overall_confidence.c.name, + ) + .join( + overall_most_common_currency_used, + json_ready.c.name == overall_most_common_currency_used.c.name, + ) + .group_by(json_ready.c.name, overall_confidence.c.confidenceRating) + ).cte("timeSeriesData") + + final_query = select( + func.min(overall_most_common_currency_used.c.mostCommonCurrencyUsed).label( + "mostCommonCurrencyUsed" + ), + func.json_agg( + func.json_build_object( + "name", + time_series_data.c.name, + "data", + time_series_data.c.data, + "confidenceRating", + time_series_data.c.confidenceRating, + ) + ).label("data"), ) return final_query async def _plot_execute(self, db: AsyncSession, *, statement: Select) -> PlotData: result = await self._perform_plot_db_statement(db, statement=statement) - df = self._convert_result_to_df(result) - if df.empty: - raise PlotQueryDataNotFoundError( - query_data=str(statement), - function_name=self.plot.__name__, - class_name=self.__class__.__name__, - ) - - return self._create_plot_data(df) + return result.first() def _convert_plot_query_type(self, query: PlotQuery) -> IdentifiedPlotQuery: query_dump = query.model_dump() @@ -588,6 +651,7 @@ async def plot(self, db: AsyncSession, *, query: PlotQuery) -> PlotData: # Logs statement in nice format log_clause = stmt.compile(engine, compile_kwargs={"literal_binds": True}) plot_logger.info(f"{log_clause}") + return await self._plot_execute(db, statement=stmt)