diff --git a/src/backend_api/app/alembic/versions/cc39d4eb113b_added_gameitemid_to_identified_items.py b/src/backend_api/app/alembic/versions/cc39d4eb113b_added_gameitemid_to_identified_items.py new file mode 100644 index 00000000..dc082e99 --- /dev/null +++ b/src/backend_api/app/alembic/versions/cc39d4eb113b_added_gameitemid_to_identified_items.py @@ -0,0 +1,31 @@ +"""Added gameItemId to identified items + +Revision ID: cc39d4eb113b +Revises: e38727349f3f +Create Date: 2025-10-25 22:26:59.343322 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "cc39d4eb113b" +down_revision: Union[str, None] = "e38727349f3f" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("item", sa.Column("gameItemId", sa.Text(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("item", "gameItemId") + # ### end Alembic commands ### diff --git a/src/backend_api/app/core/models/models.py b/src/backend_api/app/core/models/models.py index 65435167..0a2bd270 100644 --- a/src/backend_api/app/core/models/models.py +++ b/src/backend_api/app/core/models/models.py @@ -84,7 +84,8 @@ class Item(_ItemBase, Base): # Hypertable # For hypertable specs, see alembic revision `cc29b89156db' __tablename__ = "item" - + # TODO do something about None and make it not nullable + gameItemId: Mapped[str | None] = mapped_column(Text) prefixes: Mapped[int | None] = mapped_column(SmallInteger) suffixes: Mapped[int | None] = mapped_column(SmallInteger) foilVariation: Mapped[int | None] = mapped_column(SmallInteger) diff --git a/src/backend_api/app/core/schemas/item.py b/src/backend_api/app/core/schemas/item.py index 72299c09..6b0179be 100644 --- a/src/backend_api/app/core/schemas/item.py +++ b/src/backend_api/app/core/schemas/item.py @@ -19,6 +19,7 @@ class _BaseItem(_pydantic.BaseModel): itemBaseTypeId: int ilvl: int rarity: str + gameItemId: str | None = None identified: bool = True currencyAmount: float | None = None currencyId: int | None = None diff --git a/src/backend_api/app/core/schemas/plot/input.py b/src/backend_api/app/core/schemas/plot/input.py index f5052ab5..e56b665d 100644 --- a/src/backend_api/app/core/schemas/plot/input.py +++ b/src/backend_api/app/core/schemas/plot/input.py @@ -49,12 +49,14 @@ class PlotQuery(BasePlotQuery): "Plots for items with or without modifiers" wantedModifiers: list[list[WantedModifier]] | None = None + dataPointsPerHour: int = 5 class IdentifiedPlotQuery(BasePlotQuery): "Plots for items with modifiers" wantedModifiers: list[list[WantedModifier]] + dataPointsPerHour: int = 5 class UnidentifiedPlotQuery(BasePlotQuery): diff --git a/src/backend_api/app/core/schemas/plot/output.py b/src/backend_api/app/core/schemas/plot/output.py index 998f3b19..eb6d8fed 100644 --- a/src/backend_api/app/core/schemas/plot/output.py +++ b/src/backend_api/app/core/schemas/plot/output.py @@ -1,5 +1,3 @@ -from typing import Literal - import pydantic as _pydantic @@ -7,15 +5,19 @@ class Datum(_pydantic.BaseModel): hoursSinceLaunch: int valueInChaos: float valueInMostCommonCurrencyUsed: float - confidence: Literal["low", "medium", "high"] -class TimeseriesData(_pydantic.BaseModel): - name: str +class LinkedPrices(_pydantic.BaseModel): + gameItemId: str data: list[Datum] - confidenceRating: Literal["low", "medium", "high"] + + +class LeagueData(_pydantic.BaseModel): + league: str + linkedPrices: list[LinkedPrices] | None = None + unlinkedPrices: list[Datum] | None = None class PlotData(_pydantic.BaseModel): mostCommonCurrencyUsed: str - data: list[TimeseriesData] + data: list[LeagueData] diff --git a/src/backend_api/app/plotting/README.md b/src/backend_api/app/plotting/README.md new file mode 100644 index 00000000..404eee7d --- /dev/null +++ b/src/backend_api/app/plotting/README.md @@ -0,0 +1,169 @@ +A potential way to do data aggregation in pure sql. Not complete yet + +```sql +WITH "baseQuery" AS +( + SELECT item."itemId" AS "itemId", item."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", item.league AS league, item."itemBaseTypeId" AS "itemBaseTypeId", item."currencyId" AS "currencyId", item."currencyAmount" AS "currencyAmount", currency."tradeName" AS "tradeName", currency."valueInChaos" AS "valueInChaos", currency."createdHoursSinceLaunch" AS "currencyCreatedHoursSinceLaunch" + FROM item JOIN currency ON item."currencyId" = currency."currencyId" + WHERE (item.league = 'Mercenaries') + -- AND (item."createdHoursSinceLaunch" >3000) + AND ( + EXISTS ( + SELECT 1 + FROM item_modifier + WHERE item."itemId" = item_modifier."itemId" AND item_modifier."modifierId" = 2 + ) + ) AND true +), +"mostCommon" AS + ( + SELECT "baseQuery"."tradeName" AS "mostCommonTradeName", count("baseQuery"."tradeName") AS "nameCount" + FROM "baseQuery" GROUP BY "baseQuery".league, "baseQuery"."tradeName" ORDER BY "nameCount" DESC + LIMIT 1 + ), +"mostCommonIds" AS +( + SELECT "baseQuery"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", max("baseQuery"."currencyId") AS "mostCommonCurrencyId" + FROM "baseQuery" + WHERE "baseQuery"."tradeName" = (SELECT "mostCommon"."mostCommonTradeName" FROM "mostCommon") + GROUP BY "baseQuery"."createdHoursSinceLaunch" +), +"mostCommonPrices" AS +( + SELECT "baseQuery"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", min("baseQuery"."valueInChaos") AS "mostCommonValueInChaos", min("baseQuery"."tradeName") AS "mostCommonCurrencyUsed" + FROM "baseQuery" JOIN "mostCommonIds" ON "baseQuery"."createdHoursSinceLaunch" = "mostCommonIds"."createdHoursSinceLaunch" AND "baseQuery"."currencyId" = "mostCommonIds"."mostCommonCurrencyId" + GROUP BY "baseQuery"."createdHoursSinceLaunch" +), +prices AS +( + SELECT "baseQuery"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", "baseQuery".league AS league, "baseQuery"."currencyAmount" * "baseQuery"."valueInChaos" AS "valueInChaos", ("baseQuery"."currencyAmount" * "baseQuery"."valueInChaos") / CAST("mostCommonPrices"."mostCommonValueInChaos" AS FLOAT(4)) AS "valueInMostCommonCurrencyUsed", "mostCommonPrices"."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed" + FROM "baseQuery" JOIN "mostCommonPrices" ON "baseQuery"."createdHoursSinceLaunch" = "mostCommonPrices"."createdHoursSinceLaunch" +), +"rankedPrices" AS +( + SELECT + prices."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", + prices.league AS league, + prices."valueInChaos" AS "valueInChaos", + prices."valueInMostCommonCurrencyUsed" AS "valueInMostCommonCurrencyUsed", + prices."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed", + rank() OVER (PARTITION BY prices."createdHoursSinceLaunch" ORDER BY prices."valueInChaos" ASC) AS pos + FROM prices +), +"filteredPrices" AS +( + SELECT "rankedPrices"."createdHoursSinceLaunch" AS "createdHoursSinceLaunch", "rankedPrices".league AS league, "rankedPrices"."valueInChaos" AS "valueInChaos", "rankedPrices"."valueInMostCommonCurrencyUsed" AS "valueInMostCommonCurrencyUsed", "rankedPrices"."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed", CASE WHEN ("rankedPrices".pos < 10) THEN 'low' WHEN ("rankedPrices".pos < 15) THEN 'medium' ELSE 'high' END AS confidence + FROM "rankedPrices" + WHERE "rankedPrices".pos <= 20 + ORDER BY "rankedPrices"."createdHoursSinceLaunch" +), +"jsonReady" AS +( + SELECT + "filteredPrices"."createdHoursSinceLaunch" AS "hoursSinceLaunch", + "filteredPrices".league, + avg("filteredPrices"."valueInChaos") AS "valueInChaos", + avg("filteredPrices"."valueInMostCommonCurrencyUsed") AS "valueInMostCommonCurrencyUsed", + min("filteredPrices"."mostCommonCurrencyUsed") AS "mostCommonCurrencyUsed", + min("filteredPrices".confidence) AS confidence + FROM "filteredPrices" + GROUP BY "filteredPrices"."createdHoursSinceLaunch", "filteredPrices".league + ORDER BY "filteredPrices"."createdHoursSinceLaunch" +), +"overallConfidence" AS +( + SELECT + "jsonReady".league AS "name", + "jsonReady".confidence AS "confidenceRating", + RANK() OVER ( + PARTITION BY "jsonReady".league + ORDER BY COUNT("jsonReady".confidence) + ASC + ) + FROM "jsonReady" + GROUP BY + "name", + "confidenceRating" +), +"overallMostCommonCurrencyUsed" AS ( + SELECT + "jsonReady".league AS "name", + "jsonReady"."mostCommonCurrencyUsed" AS "mostCommonCurrencyUsed", + RANK() OVER ( + PARTITION BY "jsonReady".league + ORDER BY COUNT("jsonReady"."mostCommonCurrencyUsed") + ASC + ) + FROM "jsonReady" + GROUP BY + "name", + "mostCommonCurrencyUsed" +), "timeSeriesData" AS ( + SELECT + "jsonReady".league AS "name", + json_agg( + json_build_object( + 'hoursSinceLaunch', "jsonReady"."hoursSinceLaunch", + 'valueInChaos', "jsonReady"."valueInChaos", + 'valueInMostCommonCurrencyUsed', "jsonReady"."valueInMostCommonCurrencyUsed", + 'confidence', "jsonReady"."confidence" + ) + ) AS "data", + "overallConfidence"."confidenceRating" + FROM "jsonReady" JOIN "overallConfidence" ON "jsonReady".league = "overallConfidence".name + GROUP BY "jsonReady".league, "overallConfidence"."confidenceRating" +) + + + +SELECT + json_build_object( + 'mostCommonCurrencyUsed', MIN("overallMostCommonCurrencyUsed"."mostCommonCurrencyUsed"), + 'data', json_agg( + json_build_object( + 'name', "timeSeriesData".name, + 'data', "timeSeriesData".data, + 'confidenceRating', "timeSeriesData"."confidenceRating" + ) + ) + ) +FROM "timeSeriesData" NATURAL JOIN "overallMostCommonCurrencyUsed" + +-- SELECT *, EXP(SUM(LN(multi)) OVER (PARTITION BY league, "createdHoursSinceLaunch", "clusterId" ORDER BY "valueInChaos" RANGE UNBOUNDED PRECEDING)) AS "cumMulti" +-- FROM + +-- ( +-- SELECT *, SUM(is_new_cluster) OVER (PARTITION BY league, "createdHoursSinceLaunch" ORDER BY "valueInChaos" RANGE UNBOUNDED PRECEDING) AS "clusterId" +-- FROM +-- (SELECT +-- *, +-- "valueInChaos" / "valueInChaosPrev" AS multi, +-- "valueInChaos" / "valueInChaos2Prev" AS multi2, +-- CASE +-- WHEN (("valueInChaos" / "valueInChaosPrev") > 1.05) OR (("valueInChaos" / "valueInChaos2Prev") > 1.1) +-- THEN 1 +-- ELSE +-- 0 +-- END AS is_new_cluster +-- FROM( +-- SELECT +-- *, +-- LAG("valueInChaos", CEIL("nPoints"*0.05)::INT, null) OVER (PARTITION BY league, "createdHoursSinceLaunch" ORDER BY "valueInChaos") AS "valueInChaosPrev", +-- LAG( +-- "valueInChaos", +-- CEIL("nPoints"*0.1)::INT, +-- null +-- ) OVER (PARTITION BY league, "createdHoursSinceLaunch" ORDER BY "valueInChaos") AS "valueInChaos2Prev" +-- -- FROM prices +-- FROM +-- ( +-- SELECT *, +-- COUNT(*) OVER (PARTITION BY league, "createdHoursSinceLaunch") AS "nPoints" +-- FROM prices +-- ) +-- WHERE TRUE +-- -- WHERE league = 'Phrecia' +-- AND "createdHoursSinceLaunch" = 2658 +-- ORDER BY "createdHoursSinceLaunch" +-- ))) +``` diff --git a/src/backend_api/app/plotting/plotter.py b/src/backend_api/app/plotting/plotter.py index 29b101d5..a4c4dbfc 100644 --- a/src/backend_api/app/plotting/plotter.py +++ b/src/backend_api/app/plotting/plotter.py @@ -13,6 +13,7 @@ case, desc, func, + literal, or_, select, ) @@ -429,7 +430,13 @@ def _filter_properties( def _create_plot_statement(self, query: IdentifiedPlotQuery) -> Select: start, end = query.start, query.end - statement = self._init_stmt(query, item_model=model_Item, start=start, end=end) + statement = self._init_stmt( + query, + item_model=model_Item, + start=start, + end=end, + query_select_args=[model_Item.gameItemId], + ) statement = self._filter_base_specs( statement, item_model=model_Item, query=query ) @@ -440,15 +447,14 @@ def _create_plot_statement(self, query: IdentifiedPlotQuery) -> Select: statement = self._filter_properties( statement, query=query, start=start, end=end ) - base_query = statement.cte("baseQuery") most_common = ( select( - base_query.c.tradeName.label("mostCommonTradeName"), - func.count(base_query.c.tradeName).label("nameCount"), + base_query.c["tradeName"].label("mostCommonTradeName"), + func.count(base_query.c["tradeName"]).label("nameCount"), ) - .group_by(base_query.c.league, base_query.c.tradeName) + .group_by(base_query.c["league"], base_query.c["tradeName"]) .order_by(desc("nameCount")) # Can use literal_column in complex cases .limit(1) .cte("mostCommon") @@ -456,122 +462,246 @@ def _create_plot_statement(self, query: IdentifiedPlotQuery) -> Select: most_common_ids = ( select( - base_query.c.createdHoursSinceLaunch, - func.max(base_query.c.currencyId).label("mostCommonCurrencyId"), + base_query.c["createdHoursSinceLaunch"], + func.max(base_query.c["currencyId"]).label("mostCommonCurrencyId"), ) .where( - base_query.c.tradeName - == select(most_common.c.mostCommonTradeName).scalar_subquery() + base_query.c["tradeName"] + == select(most_common.c["mostCommonTradeName"]).scalar_subquery() ) - .group_by(base_query.c.createdHoursSinceLaunch) + .group_by(base_query.c["createdHoursSinceLaunch"]) .cte("mostCommonIds") ) most_common_prices = ( select( - base_query.c.createdHoursSinceLaunch, - func.min(base_query.c.valueInChaos).label("mostCommonValueInChaos"), - func.min(base_query.c.tradeName).label("mostCommonCurrencyUsed"), + base_query.c["createdHoursSinceLaunch"], + func.min(base_query.c["valueInChaos"]).label("mostCommonValueInChaos"), + func.min(base_query.c["tradeName"]).label("mostCommonCurrencyUsed"), ) .select_from( base_query.join( most_common_ids, and_( - base_query.c.createdHoursSinceLaunch - == most_common_ids.c.createdHoursSinceLaunch, - base_query.c.currencyId - == most_common_ids.c.mostCommonCurrencyId, + base_query.c["createdHoursSinceLaunch"] + == most_common_ids.c["createdHoursSinceLaunch"], + base_query.c["currencyId"] + == most_common_ids.c["mostCommonCurrencyId"], ), ) ) - .group_by(base_query.c.createdHoursSinceLaunch) + .group_by(base_query.c["createdHoursSinceLaunch"]) .cte("mostCommonPrices") ) prices = ( select( - base_query.c.createdHoursSinceLaunch, - base_query.c.league, - (base_query.c.currencyAmount * base_query.c.valueInChaos).label( + base_query.c["createdHoursSinceLaunch"], + base_query.c["league"], + func.coalesce(base_query.c["gameItemId"], "unlinked").label( + "gameItemId" + ), + (base_query.c["currencyAmount"] * base_query.c["valueInChaos"]).label( "valueInChaos" ), ( - base_query.c.currencyAmount - * base_query.c.valueInChaos - / most_common_prices.c.mostCommonValueInChaos + base_query.c["currencyAmount"] + * base_query.c["valueInChaos"] + / most_common_prices.c["mostCommonValueInChaos"] ).label("valueInMostCommonCurrencyUsed"), - most_common_prices.c.mostCommonCurrencyUsed, + most_common_prices.c["mostCommonCurrencyUsed"], ) .select_from(base_query) .join( most_common_prices, - base_query.c.createdHoursSinceLaunch - == most_common_prices.c.createdHoursSinceLaunch, + base_query.c["createdHoursSinceLaunch"] + == most_common_prices.c["createdHoursSinceLaunch"], ) .cte("prices") ) - ranked_prices = select( prices, - func.rank() + func.row_number() .over( - partition_by=prices.c.createdHoursSinceLaunch, - order_by=prices.c.valueInChaos.asc(), + partition_by=prices.c["createdHoursSinceLaunch"], + order_by=prices.c["valueInChaos"].asc(), ) .label("pos"), ).cte("rankedPrices") filtered_prices = ( select( - ranked_prices.c.createdHoursSinceLaunch, - ranked_prices.c.league, - ranked_prices.c.valueInChaos, - ranked_prices.c.valueInMostCommonCurrencyUsed, - ranked_prices.c.mostCommonCurrencyUsed, - case( - (ranked_prices.c.pos < 10, "low"), - (ranked_prices.c.pos < 15, "medium"), - else_="high", - ).label("confidence"), + ranked_prices.c["createdHoursSinceLaunch"], + ranked_prices.c["league"], + ranked_prices.c["gameItemId"], + ranked_prices.c["valueInChaos"], + ranked_prices.c["valueInMostCommonCurrencyUsed"], + ranked_prices.c["mostCommonCurrencyUsed"], ) - .where(ranked_prices.c.pos <= 20) - .order_by(ranked_prices.c.createdHoursSinceLaunch) + .where(ranked_prices.c["pos"] <= query.dataPointsPerHour) + .order_by(ranked_prices.c["createdHoursSinceLaunch"]) .cte("filteredPrices") ) + # overall_most_common_currency_used = ( + overall_most_common_currency_used_unordered = ( + select( + literal(0).label("join_variable"), + filtered_prices.c["mostCommonCurrencyUsed"], + func.count().label("currencyCount"), + ) + .group_by(filtered_prices.c["mostCommonCurrencyUsed"]) + .cte("overallMostCommonCurrencyUsedUnordered") + ) + + overall_most_common_currency_used = ( + select(overall_most_common_currency_used_unordered) + .order_by( + overall_most_common_currency_used_unordered.c["currencyCount"].desc() + ) + .limit(1) + .cte("overallMostCommonCurrencyUsed") + ) + + items_per_id = ( + select(filtered_prices.c["gameItemId"], func.count().label("itemCount")) + .group_by(filtered_prices.c["gameItemId"]) + .where(filtered_prices.c["gameItemId"] != "unlinked") + .cte("itemsPerId") + ) + + prices_per_game_item_id = ( + select( + filtered_prices.c["gameItemId"], + func.json_agg( + func.json_build_object( + "hoursSinceLaunch", + filtered_prices.c["createdHoursSinceLaunch"], + "valueInChaos", + filtered_prices.c["valueInChaos"], + "valueInMostCommonCurrencyUsed", + filtered_prices.c["valueInMostCommonCurrencyUsed"], + ) + ).label("data"), + ) + .select_from(filtered_prices) + .join( + items_per_id, + filtered_prices.c["gameItemId"] == items_per_id.c["gameItemId"], + ) + .where( + and_( + filtered_prices.c["gameItemId"] != "unlinked", + items_per_id.c["itemCount"] > 1, + ) + ) + .group_by(filtered_prices.c["gameItemId"]) + .cte("pricesPerGameItemId") + ) + + linked_prices = ( + select( + filtered_prices.c["league"], + func.json_agg( + func.json_build_object( + "gameItemId", + filtered_prices.c["gameItemId"], + "data", + prices_per_game_item_id.c["data"], + ) + ).label("linkedPrices"), + ) + .select_from(prices_per_game_item_id) + .join( + filtered_prices, + prices_per_game_item_id.c["gameItemId"] + == filtered_prices.c["gameItemId"], + ) + .group_by(filtered_prices.c["league"]) + .cte("linkedPrices") + ) + + unlinked_prices = ( + select( + filtered_prices.c["league"], + func.json_agg( + func.json_build_object( + "hoursSinceLaunch", + filtered_prices.c["createdHoursSinceLaunch"], + "valueInChaos", + filtered_prices.c["valueInChaos"], + "valueInMostCommonCurrencyUsed", + filtered_prices.c["valueInMostCommonCurrencyUsed"], + ) + ).label("unlinkedPrices"), + ) + .select_from(filtered_prices) + .join( + items_per_id, + filtered_prices.c["gameItemId"] == items_per_id.c["gameItemId"], + isouter=True, + ) + .where( + or_( + filtered_prices.c["gameItemId"] == "unlinked", + items_per_id.c["itemCount"] == 1, + ) + ) + .group_by(filtered_prices.c["league"]) + .cte("unlinkedPrices") + ) + + league_data = ( + select( + literal(0).label("join_variable"), + unlinked_prices.c["league"], + linked_prices.c["linkedPrices"], + unlinked_prices.c["unlinkedPrices"], + ) + .select_from(unlinked_prices) + .join( + linked_prices, + unlinked_prices.c["league"] == linked_prices.c["league"], + full=True, + ) + .cte("leagueData") + ) + final_query = ( select( - filtered_prices.c.createdHoursSinceLaunch.label("hoursSinceLaunch"), - filtered_prices.c.league, - func.avg(filtered_prices.c.valueInChaos).label("valueInChaos"), - func.avg(filtered_prices.c.valueInMostCommonCurrencyUsed).label( - "valueInMostCommonCurrencyUsed" - ), - func.min(filtered_prices.c.mostCommonCurrencyUsed).label( - "mostCommonCurrencyUsed" - ), - func.min(filtered_prices.c.confidence).label("confidence"), + overall_most_common_currency_used.c["mostCommonCurrencyUsed"], + func.json_agg( + func.json_build_object( + "league", + league_data.c["league"], + "linkedPrices", + league_data.c["linkedPrices"], + "unlinkedPrices", + league_data.c["unlinkedPrices"], + ) + ).label("data"), ) - .group_by( - filtered_prices.c.createdHoursSinceLaunch, filtered_prices.c.league + .select_from(overall_most_common_currency_used) + .join( + league_data, + overall_most_common_currency_used.c["join_variable"] + == league_data.c["join_variable"], ) - .order_by(filtered_prices.c.createdHoursSinceLaunch) + .group_by(overall_most_common_currency_used.c["mostCommonCurrencyUsed"]) ) return final_query async def _plot_execute(self, db: AsyncSession, *, statement: Select) -> PlotData: result = await self._perform_plot_db_statement(db, statement=statement) - df = self._convert_result_to_df(result) - - if df.empty: + json_result = result.first() + if not json_result: raise PlotQueryDataNotFoundError( query_data=str(statement), function_name=self.plot.__name__, class_name=self.__class__.__name__, ) - - return self._create_plot_data(df) + return json_result def _convert_plot_query_type(self, query: PlotQuery) -> IdentifiedPlotQuery: query_dump = query.model_dump() @@ -588,6 +718,7 @@ async def plot(self, db: AsyncSession, *, query: PlotQuery) -> PlotData: # Logs statement in nice format log_clause = stmt.compile(engine, compile_kwargs={"literal_binds": True}) plot_logger.info(f"{log_clause}") + return await self._plot_execute(db, statement=stmt) @@ -660,7 +791,7 @@ def _create_plot_statement(self, query: UnidentifiedPlotQuery) -> Select: calc_value = select( base_query, - (base_query.c.currencyAmount * base_query.c.valueInChaos).label( + (base_query.c["currencyAmount"] * base_query.c["valueInChaos"]).label( "itemValueInChaos" ), ).cte("calcValue") @@ -670,31 +801,32 @@ def _create_plot_statement(self, query: UnidentifiedPlotQuery) -> Select: func.rank() .over( partition_by=[ - calc_value.c.createdHoursSinceLaunch, - calc_value.c.league, + calc_value.c["createdHoursSinceLaunch"], + calc_value.c["league"], ], - order_by=calc_value.c.itemValueInChaos.asc(), + order_by=calc_value.c["itemValueInChaos"].asc(), ) .label("cheap"), ).cte("rankedCheap") final_query = ( select( - ranked_cheap.c.createdHoursSinceLaunch.label("hoursSinceLaunch"), - ranked_cheap.c.league, - (ranked_cheap.c.currencyAmount * ranked_cheap.c.itemValueInChaos).label( - "valueInChaos" - ), - ranked_cheap.c.currencyAmount.label("valueInMostCommonCurrencyUsed"), - ranked_cheap.c.tradeName.label("mostCommonCurrencyUsed"), + ranked_cheap.c["createdHoursSinceLaunch"].label("hoursSinceLaunch"), + ranked_cheap.c["league"], + ( + ranked_cheap.c["currencyAmount"] + * ranked_cheap.c["itemValueInChaos"] + ).label("valueInChaos"), + ranked_cheap.c["currencyAmount"].label("valueInMostCommonCurrencyUsed"), + ranked_cheap.c["tradeName"].label("mostCommonCurrencyUsed"), case( - (ranked_cheap.c.nItems < 10, "low"), - (ranked_cheap.c.nItems < 15, "medium"), + (ranked_cheap.c["nItems"] < 10, "low"), + (ranked_cheap.c["nItems"] < 15, "medium"), else_="high", ).label("confidence"), ) - .where(ranked_cheap.c.cheap == 1) - .order_by(ranked_cheap.c.createdHoursSinceLaunch) + .where(ranked_cheap.c["cheap"] == 1) + .order_by(ranked_cheap.c["createdHoursSinceLaunch"]) ) return final_query diff --git a/src/backend_data_retrieval/data_retrieval_app/external_data_retrieval/transforming_data/transform_poe_api_data.py b/src/backend_data_retrieval/data_retrieval_app/external_data_retrieval/transforming_data/transform_poe_api_data.py index 0d9476a8..df4c2157 100644 --- a/src/backend_data_retrieval/data_retrieval_app/external_data_retrieval/transforming_data/transform_poe_api_data.py +++ b/src/backend_data_retrieval/data_retrieval_app/external_data_retrieval/transforming_data/transform_poe_api_data.py @@ -32,6 +32,7 @@ def _create_item_table( """ self.item_columns = [ "itemId", + "id", "name", "league", "baseType", @@ -144,8 +145,8 @@ def transform_influences(row: pd.DataFrame, influence_columns: list[str]): if "extended.suffixes" in item_df.columns: rename_extended_map["extended.suffixes"] = "suffixes" - if rename_extended_map: - item_df = item_df.rename(columns=rename_extended_map) + rename_map = {**rename_extended_map, "id": "gameItemId"} + item_df = item_df.rename(columns=rename_map) stash_series = item_df["stash"].str.split(" ") currency_series = item_df["note"].str.split(" ") @@ -192,6 +193,7 @@ def item_table_columns_to_not_drop(self) -> set[str]: dont_drop_columns = self._item_table_columns_to_not_drop except AttributeError: dont_drop_columns = { + "gameItemId", "name", "league", "itemBaseTypeId",