Skip to content

Commit b210293

Browse files
committed
refactor(core): add fee analysis and extract liquidity analysis into dedicated module
- Add `analysis/fees.py` with structured metrics (fee stats, liquidity, market health) - Remove obsolete `dataframe.py` (migrated responsibilities to preprocessing/analysis) - Update `pyproject.toml` to include pandas, seaborn, pytest, and setuptools - Clean up .gitignore and remove IDE artifacts from tracking
1 parent cc0eab9 commit b210293

25 files changed

+1725
-313
lines changed

.gitignore

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# IDE and editor configs
2+
.idea/
3+
*.kate-swp
4+
5+
# Bytecode and Python artifacts
6+
__pycache__/
7+
src/__pycache__/
8+
*.py[cod]
9+
10+
# Notebooks & checkpoints
11+
src/.ipynb_checkpoints/
12+
13+
# Data files
14+
data/
15+
data_*
16+
data_old
17+
dataframe_old.pkl
18+
19+
# Poetry & test caches
20+
poetry.lock
21+
.pytest_cache/
22+
23+
# Project-specific
24+
dataframe*

.idea/JMOrderbookAnalytics.iml

Lines changed: 0 additions & 8 deletions
This file was deleted.

.idea/misc.xml

Lines changed: 0 additions & 4 deletions
This file was deleted.

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ readme = "README.md"
99
python = "^3.11"
1010
matplotlib = "^3.9.2"
1111
jupyter = "^1.1.1"
12+
pandas = "^2.2.3"
13+
seaborn = "^0.13.2"
14+
pytest = "^8.3.5"
15+
setuptools = "^78.1.0"
1216

1317

1418
[build-system]
-3.48 KB
Binary file not shown.
-2.52 KB
Binary file not shown.
-2.51 KB
Binary file not shown.
-7.56 KB
Binary file not shown.

src/analysis/__init__.py

Whitespace-only changes.

src/analysis/fees.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
from typing import Dict, Any
2+
import pandas as pd
3+
import numpy as np
4+
from dataclasses import dataclass
5+
6+
7+
@dataclass
8+
class FeeStatistics:
9+
"""Statistics for fee analysis."""
10+
mean: float
11+
median: float
12+
std: float
13+
min: float
14+
max: float
15+
percentiles: Dict[str, float]
16+
17+
18+
def calculate_fee_statistics(df: pd.DataFrame) -> Dict[str, FeeStatistics]:
19+
"""Calculate comprehensive fee analysis."""
20+
stats = {}
21+
22+
# Relative fee percentage analysis
23+
stats['relative_percentage'] = FeeStatistics(
24+
mean=df['relative_fees_percentage_mean'].mean(),
25+
median=df['relative_fees_percentage_median'].mean(),
26+
std=df['relative_fees_percentage_mean'].std(),
27+
min=df['relative_fees_percentage_mean'].min(),
28+
max=df['relative_fees_percentage_mean'].max(),
29+
percentiles={
30+
'25': df['relative_fees_percentage_mean'].quantile(0.25),
31+
'75': df['relative_fees_percentage_mean'].quantile(0.75),
32+
'95': df['relative_fees_percentage_mean'].quantile(0.95)
33+
}
34+
)
35+
36+
# Absolute fee analysis
37+
stats['absolute_satoshis'] = FeeStatistics(
38+
mean=df['absolute_fees_satoshis_mean'].mean(),
39+
median=df['absolute_fees_satoshis_median'].mean(),
40+
std=df['absolute_fees_satoshis_mean'].std(),
41+
min=df['absolute_fees_satoshis_mean'].min(),
42+
max=df['absolute_fees_satoshis_mean'].max(),
43+
percentiles={
44+
'25': df['absolute_fees_satoshis_mean'].quantile(0.25),
45+
'75': df['absolute_fees_satoshis_mean'].quantile(0.75),
46+
'95': df['absolute_fees_satoshis_mean'].quantile(0.95)
47+
}
48+
)
49+
50+
return stats
51+
52+
53+
def calculate_time_based_statistics(df: pd.DataFrame, freq: str = 'D') -> pd.DataFrame:
54+
"""Calculate analysis over different time periods."""
55+
return df.groupby(pd.Grouper(freq=freq)).agg({
56+
'relative_fees_percentage_mean': ['mean', 'std', 'count'],
57+
'absolute_fees_satoshis_mean': ['mean', 'std', 'count'],
58+
'total_liquidity': ['mean', 'std', 'min', 'max'],
59+
'total_unique_makers': ['mean', 'min', 'max']
60+
})
61+
62+
63+
# aggregations.py
64+
def compute_fee_ratios(df: pd.DataFrame, window_size: int = 1000) -> pd.DataFrame:
65+
"""Compute smoothed fee type ratios."""
66+
df_smooth = pd.DataFrame(index=df.index)
67+
68+
# Calculate fee ratios
69+
df_smooth['relative_ratio'] = (
70+
df['relative_fees_count'] /
71+
(df['relative_fees_count'] + df['absolute_fees_count'])
72+
)
73+
74+
df_smooth['absolute_ratio'] = (
75+
df['absolute_fees_count'] /
76+
(df['relative_fees_count'] + df['absolute_fees_count'])
77+
)
78+
79+
# Add smoothed versions
80+
for col in ['relative_ratio', 'absolute_ratio']:
81+
df_smooth[f'{col}_smooth'] = df_smooth[col].rolling(
82+
window=window_size, center=True).mean()
83+
84+
return df_smooth
85+
86+
87+
def compute_volume_metrics(df: pd.DataFrame, window_size: int = 1000) -> pd.DataFrame:
88+
"""Compute volume-related metrics."""
89+
df_vol = pd.DataFrame(index=df.index)
90+
91+
# Calculate total volume
92+
df_vol['total_volume'] = (
93+
df['relative_fees_count'] + df['absolute_fees_count']
94+
)
95+
96+
# Calculate market share
97+
df_vol['relative_share'] = df['relative_fees_count'] / df_vol['total_volume']
98+
df_vol['absolute_share'] = df['absolute_fees_count'] / df_vol['total_volume']
99+
100+
# Add smoothed versions
101+
metrics = ['total_volume', 'relative_share', 'absolute_share']
102+
for metric in metrics:
103+
df_vol[f'{metric}_smooth'] = df_vol[metric].rolling(
104+
window=window_size, center=True).mean()
105+
106+
return df_vol
107+
108+
109+
# metrics.py
110+
def calculate_liquidity_metrics(df: pd.DataFrame) -> Dict[str, float]:
111+
"""Calculate liquidity-related metrics."""
112+
return {
113+
'avg_liquidity': df['total_liquidity'].mean(),
114+
'liquidity_per_maker': (
115+
df['total_liquidity'] / df['total_unique_makers']
116+
).mean(),
117+
'liquidity_volatility': df['total_liquidity'].std() / df['total_liquidity'].mean(),
118+
}
119+
120+
121+
def calculate_market_health_metrics(df: pd.DataFrame) -> Dict[str, float]:
122+
"""Calculate market health indicators."""
123+
return {
124+
'maker_stability': (
125+
df['total_unique_makers'].rolling(window=1000).std() /
126+
df['total_unique_makers'].rolling(window=1000).mean()
127+
).mean(),
128+
'fee_stability': (
129+
df['relative_fees_percentage_mean'].rolling(window=1000).std() /
130+
df['relative_fees_percentage_mean'].rolling(window=1000).mean()
131+
).mean(),
132+
'market_depth': (
133+
df['total_liquidity'] * df['total_unique_makers']
134+
).mean(),
135+
}

0 commit comments

Comments
 (0)