diff --git a/.spice/models/gas_fees/gas_fees_v2.py b/.spice/models/gas_fees/gas_fees_v2.py new file mode 100644 index 0000000..fdada27 --- /dev/null +++ b/.spice/models/gas_fees/gas_fees_v2.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python + +import os +import json +import math +import numpy as np +import pandas as pd +import xgboost as xgb +from typing import Any +from dataclasses import asdict + +import spec + + +class XGBoostForecaster: + def __init__(self, lookback_size: int, forecast_size: int): + self.lookback_size = lookback_size + self.forecast_size = forecast_size + self.model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=1000) + + def _make_xgboost_data(self, filled_df: pd.DataFrame) -> np.array: + filled_df.sort_values('ts', inplace=True) + data = filled_df['y'].values + has_covariate = 'covariate' in filled_df.columns + + X, y = [], [] + for i in range(self.lookback_size, len(data) - self.forecast_size): + X.append(data[i-self.lookback_size:i]) + y.append(data[i:i+self.forecast_size]) + if has_covariate: + covariates = filled_df['covariate'].values + X[-1] = np.append(X[-1], covariates[i-self.lookback_size:i]) + + return np.array(X), np.array(y) + + def train(self, filled_df: pd.DataFrame) -> Any: + X, y = self._make_xgboost_data(filled_df) + train_size = int(0.8 * len(X)) + train_X, train_y = X[:train_size], y[:train_size] + test_X, test_y = X[train_size:], y[train_size:] + self.model.fit(train_X, train_y, eval_set=[(test_X, test_y)], verbose=True) + return self.model.evals_result() + + def infer(self, data: pd.DataFrame) -> np.array: + input_window = data['value'][-self.lookback_size:].to_numpy().reshape(-1).astype("float32") + if 'covariate' in data.columns: + covariates = data['covariate'][-self.lookback_size:].to_numpy().reshape(-1).astype("float32") + input_window = np.concatenate([input_window, covariates], axis=None) + + return self.model.predict(input_window.reshape(1, -1)) + + def save_model(self, path: str) -> None: + self.model.save_model(path) + + def load_model(self, path: str) -> None: + self.model.load_model(path) + +def _make_inference_response(predicted: np.ndarray, now: float) -> spec.InferenceResponse: + return spec.InferenceResponse(forecast=[ + spec.InferencePoint(now + i + 1, float(val) if not math.isnan(float(val)) else -1e10, None) + for i, val in enumerate(predicted) + ]) + +def train(context: Any, runtime: Any) -> None: + params = spec.TrainParams(**context) + filled_df = pd.read_parquet(os.getenv('DATA_DIR')) + + forecaster = XGBoostForecaster(params.lookback_size, params.forecast_size) + eval_results = forecaster.train(filled_df) + forecaster.save_model(os.path.join(os.environ['OUTPUT_DIR'], 'model.ubj')) + + runtime.upload(json.dumps({ + 'items': [ + { + 'type': 'html', + 'html': json.dumps(eval_results) + } + ], + }), 'report.json') + +def infer(context: Any, runtime: Any) -> None: + params = spec.InferenceParams(lookback=[spec.InferencePoint(**x) for x in context.pop('lookback')], **context) + forecaster = XGBoostForecaster(params.lookback_size, params.forecast_size) + forecaster.load_model(os.path.join(os.getenv('MODEL_DIR'), params.model_weights_name)) + + prediction = forecaster.infer(pd.DataFrame([asdict(x) for x in params.lookback])) + + resp = _make_inference_response(prediction, params.lookback[-1].timestamp) + runtime.upload(json.dumps(asdict(resp)), 'results.json') diff --git a/.spice/models/gas_fees/model.yml b/.spice/models/gas_fees/model.yml new file mode 100644 index 0000000..1500bc9 --- /dev/null +++ b/.spice/models/gas_fees/model.yml @@ -0,0 +1,13 @@ +family: gas_fees +name: v0.0 +model_type: gasfees_v1 +handler: gas_fees_v2.py +training_entry_point: gas_fees_v2.train +inference_entry_point: gas_fees_v2.infer +training_query: 'WITH counts AS ( SELECT block_number, count(1) as "count" FROM eth.transactions GROUP BY block_number ) SELECT number as "ts", CAST(b.base_fee_per_gas / 1000000000.0 AS DOUBLE) as "y", CAST(c."count" AS DOUBLE) as "covariate" FROM eth.blocks b INNER JOIN counts c ON b.number = c.block_number WHERE b.base_fee_per_gas IS NOT NULL ORDER BY block_number DESC LIMIT 500' +inference_query: 'SELECT number as "ts", CAST(base_fee_per_gas / 1000000000.0 AS DOUBLE) as "y", CAST(transaction_count AS DOUBLE) as "y2" from eth.recent_blocks WHERE base_fee_per_gas IS NOT NULL ORDER BY ts DESC LIMIT 35' +lookback_size: 30 +forecast_size: 1 +metadata: + firecache: false + covariate: true diff --git a/.spice/models/gas_fees/spec.py b/.spice/models/gas_fees/spec.py new file mode 100644 index 0000000..e679a2d --- /dev/null +++ b/.spice/models/gas_fees/spec.py @@ -0,0 +1,79 @@ +from typing import List, Optional +from dataclasses import dataclass, field + +@dataclass +class TrainParams: + model_id: str + input_parquet_cid: str + lookback_size: int + forecast_size: int + epochs: int + metadata: Optional[dict] + runtime: str + compiled_package_cid: str + train_handler: str + +@dataclass +class TrainResponse: + model_weights_name: str + model_weights_cid: str + report_cid: str + +@dataclass +class InferencePoint: + timestamp: float + value: float + covariate: Optional[float] + +@dataclass +class InferenceParams: + lookback: list[InferencePoint] + model_weights_cid: str + model_weights_name: str + lookback_size: int + forecast_size: int + runtime: str + compiled_package_cid: str + inference_handler: str + metadata: Optional[dict] = None + model_id: str = "" + model_type: str = "" + +@dataclass +class InferenceResponse: + forecast: list[InferencePoint] + +@dataclass +class PlotlyGrid: + subplots: any = None + rows: int = None + cols: int = None + +@dataclass +class PlotlyLayout: + title: str + width: int = None + height: int = None + grid: PlotlyGrid = None + yaxis1: any = None + yaxis2: any = None + annotations: list = None + +@dataclass +class HtmlReportItem: + type: str = field(default='html', init=False) + html: str + +@dataclass +class PlotlyReportItem: + type: str = field(default='plotly', init=False) + traces: List[any] + layout: PlotlyLayout + + +ReportItem = HtmlReportItem | PlotlyReportItem + +@dataclass +class Report: + items: list[ReportItem] + diff --git a/.spice/models/uniswap_v3_eth_usdt/model.yml b/.spice/models/uniswap_v3_eth_usdt/model.yml new file mode 100644 index 0000000..6a8e66c --- /dev/null +++ b/.spice/models/uniswap_v3_eth_usdt/model.yml @@ -0,0 +1,28 @@ +family: tf_uniswapv3_eth_usdt +name: v0.0.1 +model_type: tf_uniswapv3_eth_usdt_aggregated +training_query: | + SELECT + block_timestamp as ts, + CASE + WHEN cast(amount1 as double) = 0 THEN NULL + ELSE abs(cast(amount1 as double)/ POWER(10, 6) / NULLIF(abs(cast(amount0 as double)/ POWER(10, 18)), 0)) + END as y + FROM eth.uniswap_v3.event_swaps + WHERE address = '0x11b815efb8f581194ae79006d24e0d814b7697f6' + ORDER BY block_number desc + LIMIT 1000 +inference_query: | + SELECT + block_timestamp as ts, + CASE + WHEN cast(amount1 as double) = 0 THEN NULL + ELSE abs(cast(amount1 as double)/ POWER(10, 6) / NULLIF(abs(cast(amount0 as double)/ POWER(10, 18)), 0)) + END as y + FROM eth.uniswap_v3.recent_event_swaps + WHERE address = '0x11b815efb8f581194ae79006d24e0d814b7697f6' + ORDER BY block_number desc +lookback_size: 50 +forecast_size: 1 +metadata: + aggregate: true diff --git a/.spice/models/uniswap_v3_wbtc_eth/model.yml b/.spice/models/uniswap_v3_wbtc_eth/model.yml new file mode 100644 index 0000000..35c7de2 --- /dev/null +++ b/.spice/models/uniswap_v3_wbtc_eth/model.yml @@ -0,0 +1,29 @@ +family: tf_uniswapv3_wbtc_eth +name: v0.0.1 +model_type: tf_uniswapv3_wbtc_eth_aggregated +training_query: | + SELECT + block_timestamp as ts, + CASE + WHEN abs(cast(amount0 as double) * POWER(10, 8)) = 0 THEN NULL + ELSE abs(cast(amount1 as double)/ POWER(10, 18) / NULLIF(abs(cast(amount0 as double)/ POWER(10, 8)), 0)) + END as y + FROM eth.uniswap_v3.event_swaps + WHERE address = '0x4585fe77225b41b697c938b018e2ac67ac5a20c0' + ORDER BY block_number desc + LIMIT 1000 +inference_query: | + SELECT + block_timestamp as ts, + CASE + WHEN abs(cast(amount0 as double) * POWER(10, 8)) = 0 THEN NULL + ELSE abs(cast(amount1 as double)/ POWER(10, 18) / NULLIF(abs(cast(amount0 as double)/ POWER(10, 8)), 0)) + END as y + FROM eth.uniswap_v3.event_swaps + WHERE address = '0x4585fe77225b41b697c938b018e2ac67ac5a20c0' + ORDER BY block_number desc + LIMIT 100 +lookback_size: 50 +forecast_size: 1 +metadata: + aggregate: true