Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions src/backend_api/app/plotting/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
A potential way to do data aggregation in pure sql. Not complete yet

```sql
WITH "baseQuery" AS
(
SELECT item."itemId" AS "itemId", item."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", item.league AS league, item."itemBaseTypeId" AS "itemBaseTypeId", item."currencyId" AS "currencyId", item."currencyAmount" AS "currencyAmount", currency."tradeName" AS "tradeName", currency."valueInChaos" AS "valueInChaos", currency."createdHoursSinceLaunch" AS "currencyCreatedHoursSinceLaunch"
FROM item JOIN currency ON item."currencyId" = currency."currencyId"
WHERE (item.league = 'Mercenaries')
-- AND (item."createdHoursSinceLaunch" >3000)
AND (
EXISTS (
SELECT 1
FROM item_modifier
WHERE item."itemId" = item_modifier."itemId" AND item_modifier."modifierId" = 2
)
) AND true
),
"mostCommon" AS
(
SELECT "baseQuery"."tradeName" AS "mostCommonTradeName", count("baseQuery"."tradeName") AS "nameCount"
FROM "baseQuery" GROUP BY "baseQuery".league, "baseQuery"."tradeName" ORDER BY "nameCount" DESC
LIMIT 1
),
"mostCommonIds" AS
(
SELECT "baseQuery"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", max("baseQuery"."currencyId") AS "mostCommonCurrencyId"
FROM "baseQuery"
WHERE "baseQuery"."tradeName" = (SELECT "mostCommon"."mostCommonTradeName" FROM "mostCommon")
GROUP BY "baseQuery"."createdHoursSinceLaunch"
),
"mostCommonPrices" AS
(
SELECT "baseQuery"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", min("baseQuery"."valueInChaos") AS "mostCommonValueInChaos", min("baseQuery"."tradeName") AS "mostCommonCurrencyUsed"
FROM "baseQuery" JOIN "mostCommonIds" ON "baseQuery"."createdHoursSinceLaunch" = "mostCommonIds"."createdHoursSinceLaunch" AND "baseQuery"."currencyId" = "mostCommonIds"."mostCommonCurrencyId"
GROUP BY "baseQuery"."createdHoursSinceLaunch"
),
prices AS
(
SELECT "baseQuery"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", "baseQuery".league AS league, "baseQuery"."currencyAmount" * "baseQuery"."valueInChaos" AS "valueInChaos", ("baseQuery"."currencyAmount" * "baseQuery"."valueInChaos") / CAST("mostCommonPrices"."mostCommonValueInChaos" AS FLOAT(4)) AS "valueInMostCommonCurrencyUsed", "mostCommonPrices"."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed"
FROM "baseQuery" JOIN "mostCommonPrices" ON "baseQuery"."createdHoursSinceLaunch" = "mostCommonPrices"."createdHoursSinceLaunch"
),
"rankedPrices" AS
(
SELECT
prices."createdHoursSinceLaunch" AS "createdHoursSinceLaunch",
prices.league AS league,
prices."valueInChaos" AS "valueInChaos",
prices."valueInMostCommonCurrencyUsed" AS "valueInMostCommonCurrencyUsed",
prices."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed",
rank() OVER (PARTITION BY prices."createdHoursSinceLaunch" ORDER BY prices."valueInChaos" ASC) AS pos
FROM prices
),
"filteredPrices" AS
(
SELECT "rankedPrices"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", "rankedPrices".league AS league, "rankedPrices"."valueInChaos" AS "valueInChaos", "rankedPrices"."valueInMostCommonCurrencyUsed" AS "valueInMostCommonCurrencyUsed", "rankedPrices"."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed", CASE WHEN ("rankedPrices".pos < 10) THEN 'low' WHEN ("rankedPrices".pos < 15) THEN 'medium' ELSE 'high' END AS confidence
FROM "rankedPrices"
WHERE "rankedPrices".pos <= 20
ORDER BY "rankedPrices"."createdHoursSinceLaunch"
),
"jsonReady" AS
(
SELECT
"filteredPrices"."createdHoursSinceLaunch" AS "hoursSinceLaunch",
"filteredPrices".league,
avg("filteredPrices"."valueInChaos") AS "valueInChaos",
avg("filteredPrices"."valueInMostCommonCurrencyUsed") AS "valueInMostCommonCurrencyUsed",
min("filteredPrices"."mostCommonCurrencyUsed") AS "mostCommonCurrencyUsed",
min("filteredPrices".confidence) AS confidence
FROM "filteredPrices"
GROUP BY "filteredPrices"."createdHoursSinceLaunch", "filteredPrices".league
ORDER BY "filteredPrices"."createdHoursSinceLaunch"
),
"overallConfidence" AS
(
SELECT
"jsonReady".league AS "name",
"jsonReady".confidence AS "confidenceRating",
RANK() OVER (
PARTITION BY "jsonReady".league
ORDER BY COUNT("jsonReady".confidence)
ASC
)
FROM "jsonReady"
GROUP BY
"name",
"confidenceRating"
),
"overallMostCommonCurrencyUsed" AS (
SELECT
"jsonReady".league AS "name",
"jsonReady"."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed",
RANK() OVER (
PARTITION BY "jsonReady".league
ORDER BY COUNT("jsonReady"."mostCommonCurrencyUsed")
ASC
)
FROM "jsonReady"
GROUP BY
"name",
"mostCommonCurrencyUsed"
), "timeSeriesData" AS (
SELECT
"jsonReady".league AS "name",
json_agg(
json_build_object(
'hoursSinceLaunch', "jsonReady"."hoursSinceLaunch",
'valueInChaos', "jsonReady"."valueInChaos",
'valueInMostCommonCurrencyUsed', "jsonReady"."valueInMostCommonCurrencyUsed",
'confidence', "jsonReady"."confidence"
)
) AS "data",
"overallConfidence"."confidenceRating"
FROM "jsonReady" JOIN "overallConfidence" ON "jsonReady".league = "overallConfidence".name
GROUP BY "jsonReady".league, "overallConfidence"."confidenceRating"
)



SELECT
json_build_object(
'mostCommonCurrencyUsed', MIN("overallMostCommonCurrencyUsed"."mostCommonCurrencyUsed"),
'data', json_agg(
json_build_object(
'name', "timeSeriesData".name,
'data', "timeSeriesData".data,
'confidenceRating', "timeSeriesData"."confidenceRating"
)
)
)
FROM "timeSeriesData" NATURAL JOIN "overallMostCommonCurrencyUsed"

-- SELECT *, EXP(SUM(LN(multi)) OVER (PARTITION BY league, "createdHoursSinceLaunch", "clusterId" ORDER BY "valueInChaos" RANGE UNBOUNDED PRECEDING)) AS "cumMulti"
-- FROM

-- (
-- SELECT *, SUM(is_new_cluster) OVER (PARTITION BY league, "createdHoursSinceLaunch" ORDER BY "valueInChaos" RANGE UNBOUNDED PRECEDING) AS "clusterId"
-- FROM
-- (SELECT
-- *,
-- "valueInChaos" / "valueInChaosPrev" AS multi,
-- "valueInChaos" / "valueInChaos2Prev" AS multi2,
-- CASE
-- WHEN (("valueInChaos" / "valueInChaosPrev") > 1.05) OR (("valueInChaos" / "valueInChaos2Prev") > 1.1)
-- THEN 1
-- ELSE
-- 0
-- END AS is_new_cluster
-- FROM(
-- SELECT
-- *,
-- LAG("valueInChaos", CEIL("nPoints"*0.05)::INT, null) OVER (PARTITION BY league, "createdHoursSinceLaunch" ORDER BY "valueInChaos") AS "valueInChaosPrev",
-- LAG(
-- "valueInChaos",
-- CEIL("nPoints"*0.1)::INT,
-- null
-- ) OVER (PARTITION BY league, "createdHoursSinceLaunch" ORDER BY "valueInChaos") AS "valueInChaos2Prev"
-- -- FROM prices
-- FROM
-- (
-- SELECT *,
-- COUNT(*) OVER (PARTITION BY league, "createdHoursSinceLaunch") AS "nPoints"
-- FROM prices
-- )
-- WHERE TRUE
-- -- WHERE league = 'Phrecia'
-- AND "createdHoursSinceLaunch" = 2658
-- ORDER BY "createdHoursSinceLaunch"
-- )))
```
86 changes: 75 additions & 11 deletions src/backend_api/app/plotting/plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,10 +539,10 @@ def _create_plot_statement(self, query: IdentifiedPlotQuery) -> Select:
.cte("filteredPrices")
)

final_query = (
json_ready = (
select(
filtered_prices.c.createdHoursSinceLaunch.label("hoursSinceLaunch"),
filtered_prices.c.league,
filtered_prices.c.league.label("name"),
func.avg(filtered_prices.c.valueInChaos).label("valueInChaos"),
func.avg(filtered_prices.c.valueInMostCommonCurrencyUsed).label(
"valueInMostCommonCurrencyUsed"
Expand All @@ -556,22 +556,85 @@ def _create_plot_statement(self, query: IdentifiedPlotQuery) -> Select:
filtered_prices.c.createdHoursSinceLaunch, filtered_prices.c.league
)
.order_by(filtered_prices.c.createdHoursSinceLaunch)
).cte("jsonReady")

overall_confidence = (
select(
json_ready.c.name,
json_ready.c.confidence.label("confidenceRating"),
func.rank().over(
partition_by=json_ready.c.name,
order_by=func.count(json_ready.c.confidence),
),
)
.group_by(json_ready.c.name, json_ready.c.confidence)
.cte("overallConfidence")
)

overall_most_common_currency_used = (
select(
json_ready.c.name,
json_ready.c.mostCommonCurrencyUsed.label("mostCommonCurrencyUsed"),
func.rank().over(
partition_by=json_ready.c.name,
order_by=func.count(json_ready.c.mostCommonCurrencyUsed),
),
)
.group_by(json_ready.c.name, json_ready.c.mostCommonCurrencyUsed)
.cte("overallMostCommonCurrencyUsed")
)

time_series_data = (
select(
json_ready.c.name,
func.json_agg(
func.json_build_object(
"hoursSinceLaunch",
json_ready.c.hoursSinceLaunch,
"valueInChaos",
json_ready.c.valueInChaos,
"valueInMostCommonCurrencyUsed",
json_ready.c.valueInMostCommonCurrencyUsed,
"confidence",
json_ready.c.confidence,
)
).label("data"),
overall_confidence.c.confidenceRating,
)
.select_from(json_ready)
.join(
overall_confidence,
json_ready.c.name == overall_confidence.c.name,
)
.join(
overall_most_common_currency_used,
json_ready.c.name == overall_most_common_currency_used.c.name,
)
.group_by(json_ready.c.name, overall_confidence.c.confidenceRating)
).cte("timeSeriesData")

final_query = select(
func.min(overall_most_common_currency_used.c.mostCommonCurrencyUsed).label(
"mostCommonCurrencyUsed"
),
func.json_agg(
func.json_build_object(
"name",
time_series_data.c.name,
"data",
time_series_data.c.data,
"confidenceRating",
time_series_data.c.confidenceRating,
)
).label("data"),
)

return final_query

async def _plot_execute(self, db: AsyncSession, *, statement: Select) -> PlotData:
result = await self._perform_plot_db_statement(db, statement=statement)
df = self._convert_result_to_df(result)

if df.empty:
raise PlotQueryDataNotFoundError(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should verify that something is returned

query_data=str(statement),
function_name=self.plot.__name__,
class_name=self.__class__.__name__,
)

return self._create_plot_data(df)
return result.first()

def _convert_plot_query_type(self, query: PlotQuery) -> IdentifiedPlotQuery:
query_dump = query.model_dump()
Expand All @@ -588,6 +651,7 @@ async def plot(self, db: AsyncSession, *, query: PlotQuery) -> PlotData:
# Logs statement in nice format
log_clause = stmt.compile(engine, compile_kwargs={"literal_binds": True})
plot_logger.info(f"{log_clause}")

return await self._plot_execute(db, statement=stmt)


Expand Down
Loading