Skip to content

inital changes for website #28

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions .spice/models/gas_fees/gas_fees_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env python

import os
import json
import math
import numpy as np
import pandas as pd
import xgboost as xgb
from typing import Any
from dataclasses import asdict

import spec


class XGBoostForecaster:
def __init__(self, lookback_size: int, forecast_size: int):
self.lookback_size = lookback_size
self.forecast_size = forecast_size
self.model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=1000)

def _make_xgboost_data(self, filled_df: pd.DataFrame) -> np.array:
filled_df.sort_values('ts', inplace=True)
data = filled_df['y'].values
has_covariate = 'covariate' in filled_df.columns

X, y = [], []
for i in range(self.lookback_size, len(data) - self.forecast_size):
X.append(data[i-self.lookback_size:i])
y.append(data[i:i+self.forecast_size])
if has_covariate:
covariates = filled_df['covariate'].values
X[-1] = np.append(X[-1], covariates[i-self.lookback_size:i])

return np.array(X), np.array(y)

def train(self, filled_df: pd.DataFrame) -> Any:
X, y = self._make_xgboost_data(filled_df)
train_size = int(0.8 * len(X))
train_X, train_y = X[:train_size], y[:train_size]
test_X, test_y = X[train_size:], y[train_size:]
self.model.fit(train_X, train_y, eval_set=[(test_X, test_y)], verbose=True)
return self.model.evals_result()

def infer(self, data: pd.DataFrame) -> np.array:
input_window = data['value'][-self.lookback_size:].to_numpy().reshape(-1).astype("float32")
if 'covariate' in data.columns:
covariates = data['covariate'][-self.lookback_size:].to_numpy().reshape(-1).astype("float32")
input_window = np.concatenate([input_window, covariates], axis=None)

return self.model.predict(input_window.reshape(1, -1))

def save_model(self, path: str) -> None:
self.model.save_model(path)

def load_model(self, path: str) -> None:
self.model.load_model(path)

def _make_inference_response(predicted: np.ndarray, now: float) -> spec.InferenceResponse:
return spec.InferenceResponse(forecast=[
spec.InferencePoint(now + i + 1, float(val) if not math.isnan(float(val)) else -1e10, None)
for i, val in enumerate(predicted)
])

def train(context: Any, runtime: Any) -> None:
params = spec.TrainParams(**context)
filled_df = pd.read_parquet(os.getenv('DATA_DIR'))

forecaster = XGBoostForecaster(params.lookback_size, params.forecast_size)
eval_results = forecaster.train(filled_df)
forecaster.save_model(os.path.join(os.environ['OUTPUT_DIR'], 'model.ubj'))

runtime.upload(json.dumps({
'items': [
{
'type': 'html',
'html': json.dumps(eval_results)
}
],
}), 'report.json')

def infer(context: Any, runtime: Any) -> None:
params = spec.InferenceParams(lookback=[spec.InferencePoint(**x) for x in context.pop('lookback')], **context)
forecaster = XGBoostForecaster(params.lookback_size, params.forecast_size)
forecaster.load_model(os.path.join(os.getenv('MODEL_DIR'), params.model_weights_name))

prediction = forecaster.infer(pd.DataFrame([asdict(x) for x in params.lookback]))

resp = _make_inference_response(prediction, params.lookback[-1].timestamp)
runtime.upload(json.dumps(asdict(resp)), 'results.json')
13 changes: 13 additions & 0 deletions .spice/models/gas_fees/model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
family: gas_fees
name: v0.0
model_type: gasfees_v1
handler: gas_fees_v2.py
training_entry_point: gas_fees_v2.train
inference_entry_point: gas_fees_v2.infer
training_query: 'WITH counts AS ( SELECT block_number, count(1) as "count" FROM eth.transactions GROUP BY block_number ) SELECT number as "ts", CAST(b.base_fee_per_gas / 1000000000.0 AS DOUBLE) as "y", CAST(c."count" AS DOUBLE) as "covariate" FROM eth.blocks b INNER JOIN counts c ON b.number = c.block_number WHERE b.base_fee_per_gas IS NOT NULL ORDER BY block_number DESC LIMIT 500'
inference_query: 'SELECT number as "ts", CAST(base_fee_per_gas / 1000000000.0 AS DOUBLE) as "y", CAST(transaction_count AS DOUBLE) as "y2" from eth.recent_blocks WHERE base_fee_per_gas IS NOT NULL ORDER BY ts DESC LIMIT 35'
lookback_size: 30
forecast_size: 1
metadata:
firecache: false
covariate: true
79 changes: 79 additions & 0 deletions .spice/models/gas_fees/spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from typing import List, Optional
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this should be in a library that we provide? I don't think we want everyone that implements a model to have to redefine these.

No need to action now - just thinking out loud. Let's create an issue to track though.

from dataclasses import dataclass, field

@dataclass
class TrainParams:
model_id: str
input_parquet_cid: str
lookback_size: int
forecast_size: int
epochs: int
metadata: Optional[dict]
runtime: str
compiled_package_cid: str
train_handler: str

@dataclass
class TrainResponse:
model_weights_name: str
model_weights_cid: str
report_cid: str

@dataclass
class InferencePoint:
timestamp: float
value: float
covariate: Optional[float]

@dataclass
class InferenceParams:
lookback: list[InferencePoint]
model_weights_cid: str
model_weights_name: str
lookback_size: int
forecast_size: int
runtime: str
compiled_package_cid: str
inference_handler: str
metadata: Optional[dict] = None
model_id: str = ""
model_type: str = ""

@dataclass
class InferenceResponse:
forecast: list[InferencePoint]

@dataclass
class PlotlyGrid:
subplots: any = None
rows: int = None
cols: int = None

@dataclass
class PlotlyLayout:
title: str
width: int = None
height: int = None
grid: PlotlyGrid = None
yaxis1: any = None
yaxis2: any = None
annotations: list = None

@dataclass
class HtmlReportItem:
type: str = field(default='html', init=False)
html: str

@dataclass
class PlotlyReportItem:
type: str = field(default='plotly', init=False)
traces: List[any]
layout: PlotlyLayout


ReportItem = HtmlReportItem | PlotlyReportItem

@dataclass
class Report:
items: list[ReportItem]

28 changes: 28 additions & 0 deletions .spice/models/uniswap_v3_eth_usdt/model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
family: tf_uniswapv3_eth_usdt
name: v0.0.1
model_type: tf_uniswapv3_eth_usdt_aggregated
training_query: |
SELECT
block_timestamp as ts,
CASE
WHEN cast(amount1 as double) = 0 THEN NULL
ELSE abs(cast(amount1 as double)/ POWER(10, 6) / NULLIF(abs(cast(amount0 as double)/ POWER(10, 18)), 0))
END as y
FROM eth.uniswap_v3.event_swaps
WHERE address = '0x11b815efb8f581194ae79006d24e0d814b7697f6'
ORDER BY block_number desc
LIMIT 1000
inference_query: |
SELECT
block_timestamp as ts,
CASE
WHEN cast(amount1 as double) = 0 THEN NULL
ELSE abs(cast(amount1 as double)/ POWER(10, 6) / NULLIF(abs(cast(amount0 as double)/ POWER(10, 18)), 0))
END as y
FROM eth.uniswap_v3.recent_event_swaps
WHERE address = '0x11b815efb8f581194ae79006d24e0d814b7697f6'
ORDER BY block_number desc
lookback_size: 50
forecast_size: 1
metadata:
aggregate: true
29 changes: 29 additions & 0 deletions .spice/models/uniswap_v3_wbtc_eth/model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
family: tf_uniswapv3_wbtc_eth
name: v0.0.1
model_type: tf_uniswapv3_wbtc_eth_aggregated
training_query: |
SELECT
block_timestamp as ts,
CASE
WHEN abs(cast(amount0 as double) * POWER(10, 8)) = 0 THEN NULL
ELSE abs(cast(amount1 as double)/ POWER(10, 18) / NULLIF(abs(cast(amount0 as double)/ POWER(10, 8)), 0))
END as y
FROM eth.uniswap_v3.event_swaps
WHERE address = '0x4585fe77225b41b697c938b018e2ac67ac5a20c0'
ORDER BY block_number desc
LIMIT 1000
inference_query: |
SELECT
block_timestamp as ts,
CASE
WHEN abs(cast(amount0 as double) * POWER(10, 8)) = 0 THEN NULL
ELSE abs(cast(amount1 as double)/ POWER(10, 18) / NULLIF(abs(cast(amount0 as double)/ POWER(10, 8)), 0))
END as y
FROM eth.uniswap_v3.event_swaps
WHERE address = '0x4585fe77225b41b697c938b018e2ac67ac5a20c0'
ORDER BY block_number desc
LIMIT 100
lookback_size: 50
forecast_size: 1
metadata:
aggregate: true